1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 39 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 PetscInt i,*idxb = 0; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 if (idx) { 52 for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;} 53 } 54 55 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 56 if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 57 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 58 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 59 60 for (i=0; i<A->rmap.n; i++){ 61 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);} 62 } 63 64 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 65 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 66 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 67 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 68 PetscFunctionReturn(0); 69 } 70 71 EXTERN_C_BEGIN 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 74 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 75 { 76 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 77 PetscErrorCode ierr; 78 79 PetscFunctionBegin; 80 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 81 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 82 PetscFunctionReturn(0); 83 } 84 EXTERN_C_END 85 86 EXTERN_C_BEGIN 87 #undef __FUNCT__ 88 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 89 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 90 { 91 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 92 PetscErrorCode ierr; 93 94 PetscFunctionBegin; 95 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 96 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 EXTERN_C_END 100 101 /* 102 Local utility routine that creates a mapping from the global column 103 number to the local number in the off-diagonal part of the local 104 storage of the matrix. This is done in a non scable way since the 105 length of colmap equals the global matrix length. 106 */ 107 #undef __FUNCT__ 108 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 109 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 110 { 111 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 112 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 113 PetscErrorCode ierr; 114 PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 115 116 PetscFunctionBegin; 117 #if defined (PETSC_USE_CTABLE) 118 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 119 for (i=0; i<nbs; i++){ 120 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 121 } 122 #else 123 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 124 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 125 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 126 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 127 #endif 128 PetscFunctionReturn(0); 129 } 130 131 #define CHUNKSIZE 10 132 133 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 134 { \ 135 \ 136 brow = row/bs; \ 137 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 138 rmax = aimax[brow]; nrow = ailen[brow]; \ 139 bcol = col/bs; \ 140 ridx = row % bs; cidx = col % bs; \ 141 low = 0; high = nrow; \ 142 while (high-low > 3) { \ 143 t = (low+high)/2; \ 144 if (rp[t] > bcol) high = t; \ 145 else low = t; \ 146 } \ 147 for (_i=low; _i<high; _i++) { \ 148 if (rp[_i] > bcol) break; \ 149 if (rp[_i] == bcol) { \ 150 bap = ap + bs2*_i + bs*cidx + ridx; \ 151 if (addv == ADD_VALUES) *bap += value; \ 152 else *bap = value; \ 153 goto a_noinsert; \ 154 } \ 155 } \ 156 if (a->nonew == 1) goto a_noinsert; \ 157 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 158 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 159 N = nrow++ - 1; \ 160 /* shift up all the later entries in this row */ \ 161 for (ii=N; ii>=_i; ii--) { \ 162 rp[ii+1] = rp[ii]; \ 163 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 164 } \ 165 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 166 rp[_i] = bcol; \ 167 ap[bs2*_i + bs*cidx + ridx] = value; \ 168 a_noinsert:; \ 169 ailen[brow] = nrow; \ 170 } 171 172 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 173 { \ 174 brow = row/bs; \ 175 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 176 rmax = bimax[brow]; nrow = bilen[brow]; \ 177 bcol = col/bs; \ 178 ridx = row % bs; cidx = col % bs; \ 179 low = 0; high = nrow; \ 180 while (high-low > 3) { \ 181 t = (low+high)/2; \ 182 if (rp[t] > bcol) high = t; \ 183 else low = t; \ 184 } \ 185 for (_i=low; _i<high; _i++) { \ 186 if (rp[_i] > bcol) break; \ 187 if (rp[_i] == bcol) { \ 188 bap = ap + bs2*_i + bs*cidx + ridx; \ 189 if (addv == ADD_VALUES) *bap += value; \ 190 else *bap = value; \ 191 goto b_noinsert; \ 192 } \ 193 } \ 194 if (b->nonew == 1) goto b_noinsert; \ 195 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 196 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 197 CHKMEMQ;\ 198 N = nrow++ - 1; \ 199 /* shift up all the later entries in this row */ \ 200 for (ii=N; ii>=_i; ii--) { \ 201 rp[ii+1] = rp[ii]; \ 202 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 203 } \ 204 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 205 rp[_i] = bcol; \ 206 ap[bs2*_i + bs*cidx + ridx] = value; \ 207 b_noinsert:; \ 208 bilen[brow] = nrow; \ 209 } 210 211 #if defined(PETSC_USE_MAT_SINGLE) 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatSetValues_MPIBAIJ" 214 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 215 { 216 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 217 PetscErrorCode ierr; 218 PetscInt i,N = m*n; 219 MatScalar *vsingle; 220 221 PetscFunctionBegin; 222 if (N > b->setvalueslen) { 223 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 224 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 225 b->setvalueslen = N; 226 } 227 vsingle = b->setvaluescopy; 228 229 for (i=0; i<N; i++) { 230 vsingle[i] = v[i]; 231 } 232 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 238 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 239 { 240 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 241 PetscErrorCode ierr; 242 PetscInt i,N = m*n*b->bs2; 243 MatScalar *vsingle; 244 245 PetscFunctionBegin; 246 if (N > b->setvalueslen) { 247 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 248 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 249 b->setvalueslen = N; 250 } 251 vsingle = b->setvaluescopy; 252 for (i=0; i<N; i++) { 253 vsingle[i] = v[i]; 254 } 255 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 261 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 262 { 263 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 264 PetscErrorCode ierr; 265 PetscInt i,N = m*n; 266 MatScalar *vsingle; 267 268 PetscFunctionBegin; 269 if (N > b->setvalueslen) { 270 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 271 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 272 b->setvalueslen = N; 273 } 274 vsingle = b->setvaluescopy; 275 for (i=0; i<N; i++) { 276 vsingle[i] = v[i]; 277 } 278 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 284 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 285 { 286 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt i,N = m*n*b->bs2; 289 MatScalar *vsingle; 290 291 PetscFunctionBegin; 292 if (N > b->setvalueslen) { 293 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 294 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 295 b->setvalueslen = N; 296 } 297 vsingle = b->setvaluescopy; 298 for (i=0; i<N; i++) { 299 vsingle[i] = v[i]; 300 } 301 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 #endif 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 308 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 309 { 310 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 311 MatScalar value; 312 PetscTruth roworiented = baij->roworiented; 313 PetscErrorCode ierr; 314 PetscInt i,j,row,col; 315 PetscInt rstart_orig=mat->rmap.rstart; 316 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 317 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 318 319 /* Some Variables required in the macro */ 320 Mat A = baij->A; 321 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 322 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 323 MatScalar *aa=a->a; 324 325 Mat B = baij->B; 326 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 327 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 328 MatScalar *ba=b->a; 329 330 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 331 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 332 MatScalar *ap,*bap; 333 334 PetscFunctionBegin; 335 for (i=0; i<m; i++) { 336 if (im[i] < 0) continue; 337 #if defined(PETSC_USE_DEBUG) 338 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 339 #endif 340 if (im[i] >= rstart_orig && im[i] < rend_orig) { 341 row = im[i] - rstart_orig; 342 for (j=0; j<n; j++) { 343 if (in[j] >= cstart_orig && in[j] < cend_orig){ 344 col = in[j] - cstart_orig; 345 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 346 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 347 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 348 } else if (in[j] < 0) continue; 349 #if defined(PETSC_USE_DEBUG) 350 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 351 #endif 352 else { 353 if (mat->was_assembled) { 354 if (!baij->colmap) { 355 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 356 } 357 #if defined (PETSC_USE_CTABLE) 358 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 359 col = col - 1; 360 #else 361 col = baij->colmap[in[j]/bs] - 1; 362 #endif 363 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 364 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 365 col = in[j]; 366 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 367 B = baij->B; 368 b = (Mat_SeqBAIJ*)(B)->data; 369 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 370 ba=b->a; 371 } else col += in[j]%bs; 372 } else col = in[j]; 373 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 374 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 375 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 376 } 377 } 378 } else { 379 if (!baij->donotstash) { 380 if (roworiented) { 381 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 382 } else { 383 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 384 } 385 } 386 } 387 } 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 393 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 394 { 395 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 396 const MatScalar *value; 397 MatScalar *barray=baij->barray; 398 PetscTruth roworiented = baij->roworiented; 399 PetscErrorCode ierr; 400 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 401 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 402 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 403 404 PetscFunctionBegin; 405 if(!barray) { 406 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 407 baij->barray = barray; 408 } 409 410 if (roworiented) { 411 stepval = (n-1)*bs; 412 } else { 413 stepval = (m-1)*bs; 414 } 415 for (i=0; i<m; i++) { 416 if (im[i] < 0) continue; 417 #if defined(PETSC_USE_DEBUG) 418 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 419 #endif 420 if (im[i] >= rstart && im[i] < rend) { 421 row = im[i] - rstart; 422 for (j=0; j<n; j++) { 423 /* If NumCol = 1 then a copy is not required */ 424 if ((roworiented) && (n == 1)) { 425 barray = (MatScalar*)v + i*bs2; 426 } else if((!roworiented) && (m == 1)) { 427 barray = (MatScalar*)v + j*bs2; 428 } else { /* Here a copy is required */ 429 if (roworiented) { 430 value = v + i*(stepval+bs)*bs + j*bs; 431 } else { 432 value = v + j*(stepval+bs)*bs + i*bs; 433 } 434 for (ii=0; ii<bs; ii++,value+=stepval) { 435 for (jj=0; jj<bs; jj++) { 436 *barray++ = *value++; 437 } 438 } 439 barray -=bs2; 440 } 441 442 if (in[j] >= cstart && in[j] < cend){ 443 col = in[j] - cstart; 444 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 445 } 446 else if (in[j] < 0) continue; 447 #if defined(PETSC_USE_DEBUG) 448 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 449 #endif 450 else { 451 if (mat->was_assembled) { 452 if (!baij->colmap) { 453 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 454 } 455 456 #if defined(PETSC_USE_DEBUG) 457 #if defined (PETSC_USE_CTABLE) 458 { PetscInt data; 459 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 460 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 461 } 462 #else 463 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 464 #endif 465 #endif 466 #if defined (PETSC_USE_CTABLE) 467 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 468 col = (col - 1)/bs; 469 #else 470 col = (baij->colmap[in[j]] - 1)/bs; 471 #endif 472 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 473 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 474 col = in[j]; 475 } 476 } 477 else col = in[j]; 478 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 479 } 480 } 481 } else { 482 if (!baij->donotstash) { 483 if (roworiented) { 484 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 485 } else { 486 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 487 } 488 } 489 } 490 } 491 PetscFunctionReturn(0); 492 } 493 494 #define HASH_KEY 0.6180339887 495 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 496 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 497 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 500 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 501 { 502 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 503 PetscTruth roworiented = baij->roworiented; 504 PetscErrorCode ierr; 505 PetscInt i,j,row,col; 506 PetscInt rstart_orig=mat->rmap.rstart; 507 PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 508 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 509 PetscReal tmp; 510 MatScalar **HD = baij->hd,value; 511 #if defined(PETSC_USE_DEBUG) 512 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 513 #endif 514 515 PetscFunctionBegin; 516 517 for (i=0; i<m; i++) { 518 #if defined(PETSC_USE_DEBUG) 519 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 520 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 521 #endif 522 row = im[i]; 523 if (row >= rstart_orig && row < rend_orig) { 524 for (j=0; j<n; j++) { 525 col = in[j]; 526 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 527 /* Look up PetscInto the Hash Table */ 528 key = (row/bs)*Nbs+(col/bs)+1; 529 h1 = HASH(size,key,tmp); 530 531 532 idx = h1; 533 #if defined(PETSC_USE_DEBUG) 534 insert_ct++; 535 total_ct++; 536 if (HT[idx] != key) { 537 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 538 if (idx == size) { 539 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 540 if (idx == h1) { 541 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 542 } 543 } 544 } 545 #else 546 if (HT[idx] != key) { 547 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 548 if (idx == size) { 549 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 550 if (idx == h1) { 551 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 552 } 553 } 554 } 555 #endif 556 /* A HASH table entry is found, so insert the values at the correct address */ 557 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 558 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 579 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscTruth roworiented = baij->roworiented; 583 PetscErrorCode ierr; 584 PetscInt i,j,ii,jj,row,col; 585 PetscInt rstart=baij->rstartbs; 586 PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2,nbs2=n*bs2; 587 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 588 PetscReal tmp; 589 MatScalar **HD = baij->hd,*baij_a; 590 const MatScalar *v_t,*value; 591 #if defined(PETSC_USE_DEBUG) 592 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 593 #endif 594 595 PetscFunctionBegin; 596 597 if (roworiented) { 598 stepval = (n-1)*bs; 599 } else { 600 stepval = (m-1)*bs; 601 } 602 for (i=0; i<m; i++) { 603 #if defined(PETSC_USE_DEBUG) 604 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 605 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 606 #endif 607 row = im[i]; 608 v_t = v + i*nbs2; 609 if (row >= rstart && row < rend) { 610 for (j=0; j<n; j++) { 611 col = in[j]; 612 613 /* Look up into the Hash Table */ 614 key = row*Nbs+col+1; 615 h1 = HASH(size,key,tmp); 616 617 idx = h1; 618 #if defined(PETSC_USE_DEBUG) 619 total_ct++; 620 insert_ct++; 621 if (HT[idx] != key) { 622 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 623 if (idx == size) { 624 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 625 if (idx == h1) { 626 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 627 } 628 } 629 } 630 #else 631 if (HT[idx] != key) { 632 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 633 if (idx == size) { 634 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 635 if (idx == h1) { 636 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 637 } 638 } 639 } 640 #endif 641 baij_a = HD[idx]; 642 if (roworiented) { 643 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 644 /* value = v + (i*(stepval+bs)+j)*bs; */ 645 value = v_t; 646 v_t += bs; 647 if (addv == ADD_VALUES) { 648 for (ii=0; ii<bs; ii++,value+=stepval) { 649 for (jj=ii; jj<bs2; jj+=bs) { 650 baij_a[jj] += *value++; 651 } 652 } 653 } else { 654 for (ii=0; ii<bs; ii++,value+=stepval) { 655 for (jj=ii; jj<bs2; jj+=bs) { 656 baij_a[jj] = *value++; 657 } 658 } 659 } 660 } else { 661 value = v + j*(stepval+bs)*bs + i*bs; 662 if (addv == ADD_VALUES) { 663 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 664 for (jj=0; jj<bs; jj++) { 665 baij_a[jj] += *value++; 666 } 667 } 668 } else { 669 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 670 for (jj=0; jj<bs; jj++) { 671 baij_a[jj] = *value++; 672 } 673 } 674 } 675 } 676 } 677 } else { 678 if (!baij->donotstash) { 679 if (roworiented) { 680 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 681 } else { 682 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 683 } 684 } 685 } 686 } 687 #if defined(PETSC_USE_DEBUG) 688 baij->ht_total_ct = total_ct; 689 baij->ht_insert_ct = insert_ct; 690 #endif 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatGetValues_MPIBAIJ" 696 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 697 { 698 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 699 PetscErrorCode ierr; 700 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 701 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 702 703 PetscFunctionBegin; 704 for (i=0; i<m; i++) { 705 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 706 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 707 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 708 row = idxm[i] - bsrstart; 709 for (j=0; j<n; j++) { 710 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 711 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 712 if (idxn[j] >= bscstart && idxn[j] < bscend){ 713 col = idxn[j] - bscstart; 714 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 715 } else { 716 if (!baij->colmap) { 717 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 718 } 719 #if defined (PETSC_USE_CTABLE) 720 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 721 data --; 722 #else 723 data = baij->colmap[idxn[j]/bs]-1; 724 #endif 725 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 726 else { 727 col = data + idxn[j]%bs; 728 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 729 } 730 } 731 } 732 } else { 733 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 734 } 735 } 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatNorm_MPIBAIJ" 741 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 742 { 743 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 744 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 745 PetscErrorCode ierr; 746 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 747 PetscReal sum = 0.0; 748 MatScalar *v; 749 750 PetscFunctionBegin; 751 if (baij->size == 1) { 752 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 753 } else { 754 if (type == NORM_FROBENIUS) { 755 v = amat->a; 756 nz = amat->nz*bs2; 757 for (i=0; i<nz; i++) { 758 #if defined(PETSC_USE_COMPLEX) 759 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 760 #else 761 sum += (*v)*(*v); v++; 762 #endif 763 } 764 v = bmat->a; 765 nz = bmat->nz*bs2; 766 for (i=0; i<nz; i++) { 767 #if defined(PETSC_USE_COMPLEX) 768 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 769 #else 770 sum += (*v)*(*v); v++; 771 #endif 772 } 773 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 774 *nrm = sqrt(*nrm); 775 } else if (type == NORM_1) { /* max column sum */ 776 PetscReal *tmp,*tmp2; 777 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 778 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 779 tmp2 = tmp + mat->cmap.N; 780 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 781 v = amat->a; jj = amat->j; 782 for (i=0; i<amat->nz; i++) { 783 for (j=0; j<bs; j++){ 784 col = bs*(cstart + *jj) + j; /* column index */ 785 for (row=0; row<bs; row++){ 786 tmp[col] += PetscAbsScalar(*v); v++; 787 } 788 } 789 jj++; 790 } 791 v = bmat->a; jj = bmat->j; 792 for (i=0; i<bmat->nz; i++) { 793 for (j=0; j<bs; j++){ 794 col = bs*garray[*jj] + j; 795 for (row=0; row<bs; row++){ 796 tmp[col] += PetscAbsScalar(*v); v++; 797 } 798 } 799 jj++; 800 } 801 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 802 *nrm = 0.0; 803 for (j=0; j<mat->cmap.N; j++) { 804 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 805 } 806 ierr = PetscFree(tmp);CHKERRQ(ierr); 807 } else if (type == NORM_INFINITY) { /* max row sum */ 808 PetscReal *sums; 809 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 810 sum = 0.0; 811 for (j=0; j<amat->mbs; j++) { 812 for (row=0; row<bs; row++) sums[row] = 0.0; 813 v = amat->a + bs2*amat->i[j]; 814 nz = amat->i[j+1]-amat->i[j]; 815 for (i=0; i<nz; i++) { 816 for (col=0; col<bs; col++){ 817 for (row=0; row<bs; row++){ 818 sums[row] += PetscAbsScalar(*v); v++; 819 } 820 } 821 } 822 v = bmat->a + bs2*bmat->i[j]; 823 nz = bmat->i[j+1]-bmat->i[j]; 824 for (i=0; i<nz; i++) { 825 for (col=0; col<bs; col++){ 826 for (row=0; row<bs; row++){ 827 sums[row] += PetscAbsScalar(*v); v++; 828 } 829 } 830 } 831 for (row=0; row<bs; row++){ 832 if (sums[row] > sum) sum = sums[row]; 833 } 834 } 835 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 836 ierr = PetscFree(sums);CHKERRQ(ierr); 837 } else { 838 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 839 } 840 } 841 PetscFunctionReturn(0); 842 } 843 844 /* 845 Creates the hash table, and sets the table 846 This table is created only once. 847 If new entried need to be added to the matrix 848 then the hash table has to be destroyed and 849 recreated. 850 */ 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 853 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 854 { 855 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 856 Mat A = baij->A,B=baij->B; 857 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 858 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 859 PetscErrorCode ierr; 860 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 861 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 862 PetscInt *HT,key; 863 MatScalar **HD; 864 PetscReal tmp; 865 #if defined(PETSC_USE_INFO) 866 PetscInt ct=0,max=0; 867 #endif 868 869 PetscFunctionBegin; 870 baij->ht_size=(PetscInt)(factor*nz); 871 size = baij->ht_size; 872 873 if (baij->ht) { 874 PetscFunctionReturn(0); 875 } 876 877 /* Allocate Memory for Hash Table */ 878 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 879 baij->ht = (PetscInt*)(baij->hd + size); 880 HD = baij->hd; 881 HT = baij->ht; 882 883 884 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 885 886 887 /* Loop Over A */ 888 for (i=0; i<a->mbs; i++) { 889 for (j=ai[i]; j<ai[i+1]; j++) { 890 row = i+rstart; 891 col = aj[j]+cstart; 892 893 key = row*Nbs + col + 1; 894 h1 = HASH(size,key,tmp); 895 for (k=0; k<size; k++){ 896 if (!HT[(h1+k)%size]) { 897 HT[(h1+k)%size] = key; 898 HD[(h1+k)%size] = a->a + j*bs2; 899 break; 900 #if defined(PETSC_USE_INFO) 901 } else { 902 ct++; 903 #endif 904 } 905 } 906 #if defined(PETSC_USE_INFO) 907 if (k> max) max = k; 908 #endif 909 } 910 } 911 /* Loop Over B */ 912 for (i=0; i<b->mbs; i++) { 913 for (j=bi[i]; j<bi[i+1]; j++) { 914 row = i+rstart; 915 col = garray[bj[j]]; 916 key = row*Nbs + col + 1; 917 h1 = HASH(size,key,tmp); 918 for (k=0; k<size; k++){ 919 if (!HT[(h1+k)%size]) { 920 HT[(h1+k)%size] = key; 921 HD[(h1+k)%size] = b->a + j*bs2; 922 break; 923 #if defined(PETSC_USE_INFO) 924 } else { 925 ct++; 926 #endif 927 } 928 } 929 #if defined(PETSC_USE_INFO) 930 if (k> max) max = k; 931 #endif 932 } 933 } 934 935 /* Print Summary */ 936 #if defined(PETSC_USE_INFO) 937 for (i=0,j=0; i<size; i++) { 938 if (HT[i]) {j++;} 939 } 940 ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 941 #endif 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 947 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 948 { 949 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 950 PetscErrorCode ierr; 951 PetscInt nstash,reallocs; 952 InsertMode addv; 953 954 PetscFunctionBegin; 955 if (baij->donotstash) { 956 PetscFunctionReturn(0); 957 } 958 959 /* make sure all processors are either in INSERTMODE or ADDMODE */ 960 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 961 if (addv == (ADD_VALUES|INSERT_VALUES)) { 962 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 963 } 964 mat->insertmode = addv; /* in case this processor had no cache */ 965 966 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 967 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 968 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 969 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 970 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 971 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 977 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 978 { 979 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 980 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 981 PetscErrorCode ierr; 982 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 983 PetscInt *row,*col,other_disassembled; 984 PetscTruth r1,r2,r3; 985 MatScalar *val; 986 InsertMode addv = mat->insertmode; 987 PetscMPIInt n; 988 989 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 990 PetscFunctionBegin; 991 if (!baij->donotstash) { 992 while (1) { 993 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 994 if (!flg) break; 995 996 for (i=0; i<n;) { 997 /* Now identify the consecutive vals belonging to the same row */ 998 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 999 if (j < n) ncols = j-i; 1000 else ncols = n-i; 1001 /* Now assemble all these values with a single function call */ 1002 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1003 i = j; 1004 } 1005 } 1006 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1007 /* Now process the block-stash. Since the values are stashed column-oriented, 1008 set the roworiented flag to column oriented, and after MatSetValues() 1009 restore the original flags */ 1010 r1 = baij->roworiented; 1011 r2 = a->roworiented; 1012 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 1013 baij->roworiented = PETSC_FALSE; 1014 a->roworiented = PETSC_FALSE; 1015 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 1016 while (1) { 1017 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1018 if (!flg) break; 1019 1020 for (i=0; i<n;) { 1021 /* Now identify the consecutive vals belonging to the same row */ 1022 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1023 if (j < n) ncols = j-i; 1024 else ncols = n-i; 1025 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1026 i = j; 1027 } 1028 } 1029 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1030 baij->roworiented = r1; 1031 a->roworiented = r2; 1032 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 1033 } 1034 1035 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1036 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1037 1038 /* determine if any processor has disassembled, if so we must 1039 also disassemble ourselfs, in order that we may reassemble. */ 1040 /* 1041 if nonzero structure of submatrix B cannot change then we know that 1042 no processor disassembled thus we can skip this stuff 1043 */ 1044 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1045 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1046 if (mat->was_assembled && !other_disassembled) { 1047 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1048 } 1049 } 1050 1051 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1052 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1053 } 1054 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 1055 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1056 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1057 1058 #if defined(PETSC_USE_INFO) 1059 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1060 ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1061 baij->ht_total_ct = 0; 1062 baij->ht_insert_ct = 0; 1063 } 1064 #endif 1065 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1066 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1067 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1068 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1069 } 1070 1071 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1072 baij->rowvalues = 0; 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1078 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1079 { 1080 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1081 PetscErrorCode ierr; 1082 PetscMPIInt size = baij->size,rank = baij->rank; 1083 PetscInt bs = mat->rmap.bs; 1084 PetscTruth iascii,isdraw; 1085 PetscViewer sviewer; 1086 PetscViewerFormat format; 1087 1088 PetscFunctionBegin; 1089 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1090 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1091 if (iascii) { 1092 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1093 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1094 MatInfo info; 1095 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1096 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1097 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1098 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1099 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 1100 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1101 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1102 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1103 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1104 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1105 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1106 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1109 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1112 PetscFunctionReturn(0); 1113 } 1114 } 1115 1116 if (isdraw) { 1117 PetscDraw draw; 1118 PetscTruth isnull; 1119 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1120 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1121 } 1122 1123 if (size == 1) { 1124 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1125 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1126 } else { 1127 /* assemble the entire matrix onto first processor. */ 1128 Mat A; 1129 Mat_SeqBAIJ *Aloc; 1130 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1131 MatScalar *a; 1132 1133 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1134 /* Perhaps this should be the type of mat? */ 1135 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 1136 if (!rank) { 1137 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1138 } else { 1139 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1140 } 1141 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1142 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1143 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1144 1145 /* copy over the A part */ 1146 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1147 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1148 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1149 1150 for (i=0; i<mbs; i++) { 1151 rvals[0] = bs*(baij->rstartbs + i); 1152 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1153 for (j=ai[i]; j<ai[i+1]; j++) { 1154 col = (baij->cstartbs+aj[j])*bs; 1155 for (k=0; k<bs; k++) { 1156 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1157 col++; a += bs; 1158 } 1159 } 1160 } 1161 /* copy over the B part */ 1162 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1163 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1164 for (i=0; i<mbs; i++) { 1165 rvals[0] = bs*(baij->rstartbs + i); 1166 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1167 for (j=ai[i]; j<ai[i+1]; j++) { 1168 col = baij->garray[aj[j]]*bs; 1169 for (k=0; k<bs; k++) { 1170 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1171 col++; a += bs; 1172 } 1173 } 1174 } 1175 ierr = PetscFree(rvals);CHKERRQ(ierr); 1176 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1177 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1178 /* 1179 Everyone has to call to draw the matrix since the graphics waits are 1180 synchronized across all processors that share the PetscDraw object 1181 */ 1182 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 if (!rank) { 1184 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1185 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1186 } 1187 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1188 ierr = MatDestroy(A);CHKERRQ(ierr); 1189 } 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatView_MPIBAIJ" 1195 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1196 { 1197 PetscErrorCode ierr; 1198 PetscTruth iascii,isdraw,issocket,isbinary; 1199 1200 PetscFunctionBegin; 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1202 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1203 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1204 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1205 if (iascii || isdraw || issocket || isbinary) { 1206 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1207 } else { 1208 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1209 } 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1215 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1216 { 1217 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 #if defined(PETSC_USE_LOG) 1222 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 1223 #endif 1224 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1225 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1226 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1227 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1228 #if defined (PETSC_USE_CTABLE) 1229 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1230 #else 1231 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1232 #endif 1233 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1234 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1235 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1236 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1237 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1238 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1239 #if defined(PETSC_USE_MAT_SINGLE) 1240 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1241 #endif 1242 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1243 ierr = PetscFree(baij);CHKERRQ(ierr); 1244 1245 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1250 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1252 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatMult_MPIBAIJ" 1258 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1259 { 1260 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1261 PetscErrorCode ierr; 1262 PetscInt nt; 1263 1264 PetscFunctionBegin; 1265 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1266 if (nt != A->cmap.n) { 1267 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1268 } 1269 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1270 if (nt != A->rmap.n) { 1271 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1272 } 1273 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1274 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1275 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1282 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1283 { 1284 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1289 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1290 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1291 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1297 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1298 { 1299 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1300 PetscErrorCode ierr; 1301 PetscTruth merged; 1302 1303 PetscFunctionBegin; 1304 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1305 /* do nondiagonal part */ 1306 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1307 if (!merged) { 1308 /* send it on its way */ 1309 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1310 /* do local part */ 1311 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1312 /* receive remote parts: note this assumes the values are not actually */ 1313 /* inserted in yy until the next line */ 1314 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1315 } else { 1316 /* do local part */ 1317 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1318 /* send it on its way */ 1319 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1320 /* values actually were received in the Begin() but we need to call this nop */ 1321 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1328 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1329 { 1330 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 /* do nondiagonal part */ 1335 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1336 /* send it on its way */ 1337 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1338 /* do local part */ 1339 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1340 /* receive remote parts: note this assumes the values are not actually */ 1341 /* inserted in yy until the next line, which is true for my implementation*/ 1342 /* but is not perhaps always true. */ 1343 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1344 PetscFunctionReturn(0); 1345 } 1346 1347 /* 1348 This only works correctly for square matrices where the subblock A->A is the 1349 diagonal block 1350 */ 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1353 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1354 { 1355 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1356 PetscErrorCode ierr; 1357 1358 PetscFunctionBegin; 1359 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1360 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatScale_MPIBAIJ" 1366 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1367 { 1368 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1369 PetscErrorCode ierr; 1370 1371 PetscFunctionBegin; 1372 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1373 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1379 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1380 { 1381 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1382 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1383 PetscErrorCode ierr; 1384 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1385 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1386 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1387 1388 PetscFunctionBegin; 1389 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1390 mat->getrowactive = PETSC_TRUE; 1391 1392 if (!mat->rowvalues && (idx || v)) { 1393 /* 1394 allocate enough space to hold information from the longest row. 1395 */ 1396 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1397 PetscInt max = 1,mbs = mat->mbs,tmp; 1398 for (i=0; i<mbs; i++) { 1399 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1400 if (max < tmp) { max = tmp; } 1401 } 1402 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1403 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1404 } 1405 1406 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1407 lrow = row - brstart; 1408 1409 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1410 if (!v) {pvA = 0; pvB = 0;} 1411 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1412 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1413 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1414 nztot = nzA + nzB; 1415 1416 cmap = mat->garray; 1417 if (v || idx) { 1418 if (nztot) { 1419 /* Sort by increasing column numbers, assuming A and B already sorted */ 1420 PetscInt imark = -1; 1421 if (v) { 1422 *v = v_p = mat->rowvalues; 1423 for (i=0; i<nzB; i++) { 1424 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1425 else break; 1426 } 1427 imark = i; 1428 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1429 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1430 } 1431 if (idx) { 1432 *idx = idx_p = mat->rowindices; 1433 if (imark > -1) { 1434 for (i=0; i<imark; i++) { 1435 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1436 } 1437 } else { 1438 for (i=0; i<nzB; i++) { 1439 if (cmap[cworkB[i]/bs] < cstart) 1440 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1441 else break; 1442 } 1443 imark = i; 1444 } 1445 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1446 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1447 } 1448 } else { 1449 if (idx) *idx = 0; 1450 if (v) *v = 0; 1451 } 1452 } 1453 *nz = nztot; 1454 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1455 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1461 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1462 { 1463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1464 1465 PetscFunctionBegin; 1466 if (!baij->getrowactive) { 1467 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1468 } 1469 baij->getrowactive = PETSC_FALSE; 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1475 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1476 { 1477 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1478 PetscErrorCode ierr; 1479 1480 PetscFunctionBegin; 1481 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1482 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1488 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1489 { 1490 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1491 Mat A = a->A,B = a->B; 1492 PetscErrorCode ierr; 1493 PetscReal isend[5],irecv[5]; 1494 1495 PetscFunctionBegin; 1496 info->block_size = (PetscReal)matin->rmap.bs; 1497 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1498 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1499 isend[3] = info->memory; isend[4] = info->mallocs; 1500 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1501 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1502 isend[3] += info->memory; isend[4] += info->mallocs; 1503 if (flag == MAT_LOCAL) { 1504 info->nz_used = isend[0]; 1505 info->nz_allocated = isend[1]; 1506 info->nz_unneeded = isend[2]; 1507 info->memory = isend[3]; 1508 info->mallocs = isend[4]; 1509 } else if (flag == MAT_GLOBAL_MAX) { 1510 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1511 info->nz_used = irecv[0]; 1512 info->nz_allocated = irecv[1]; 1513 info->nz_unneeded = irecv[2]; 1514 info->memory = irecv[3]; 1515 info->mallocs = irecv[4]; 1516 } else if (flag == MAT_GLOBAL_SUM) { 1517 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1518 info->nz_used = irecv[0]; 1519 info->nz_allocated = irecv[1]; 1520 info->nz_unneeded = irecv[2]; 1521 info->memory = irecv[3]; 1522 info->mallocs = irecv[4]; 1523 } else { 1524 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1525 } 1526 info->rows_global = (PetscReal)A->rmap.N; 1527 info->columns_global = (PetscReal)A->cmap.N; 1528 info->rows_local = (PetscReal)A->rmap.N; 1529 info->columns_local = (PetscReal)A->cmap.N; 1530 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1531 info->fill_ratio_needed = 0; 1532 info->factor_mallocs = 0; 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1538 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1539 { 1540 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 switch (op) { 1545 case MAT_NO_NEW_NONZERO_LOCATIONS: 1546 case MAT_COLUMNS_SORTED: 1547 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1548 case MAT_KEEP_ZEROED_ROWS: 1549 case MAT_NEW_NONZERO_LOCATION_ERR: 1550 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1551 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1552 break; 1553 case MAT_ROW_ORIENTED: 1554 a->roworiented = flg; 1555 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1556 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1557 break; 1558 case MAT_ROWS_SORTED: 1559 case MAT_NEW_DIAGONALS: 1560 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1561 break; 1562 case MAT_IGNORE_OFF_PROC_ENTRIES: 1563 a->donotstash = flg; 1564 break; 1565 case MAT_USE_HASH_TABLE: 1566 a->ht_flag = flg; 1567 break; 1568 case MAT_SYMMETRIC: 1569 case MAT_STRUCTURALLY_SYMMETRIC: 1570 case MAT_HERMITIAN: 1571 case MAT_SYMMETRY_ETERNAL: 1572 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1573 break; 1574 default: 1575 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1582 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1583 { 1584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1585 Mat_SeqBAIJ *Aloc; 1586 Mat B; 1587 PetscErrorCode ierr; 1588 PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1589 PetscInt bs=A->rmap.bs,mbs=baij->mbs; 1590 MatScalar *a; 1591 1592 PetscFunctionBegin; 1593 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1594 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1595 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1596 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1597 ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1598 1599 /* copy over the A part */ 1600 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1601 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1602 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1603 1604 for (i=0; i<mbs; i++) { 1605 rvals[0] = bs*(baij->rstartbs + i); 1606 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1607 for (j=ai[i]; j<ai[i+1]; j++) { 1608 col = (baij->cstartbs+aj[j])*bs; 1609 for (k=0; k<bs; k++) { 1610 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1611 col++; a += bs; 1612 } 1613 } 1614 } 1615 /* copy over the B part */ 1616 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1617 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1618 for (i=0; i<mbs; i++) { 1619 rvals[0] = bs*(baij->rstartbs + i); 1620 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1621 for (j=ai[i]; j<ai[i+1]; j++) { 1622 col = baij->garray[aj[j]]*bs; 1623 for (k=0; k<bs; k++) { 1624 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1625 col++; a += bs; 1626 } 1627 } 1628 } 1629 ierr = PetscFree(rvals);CHKERRQ(ierr); 1630 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1631 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1632 1633 if (matout) { 1634 *matout = B; 1635 } else { 1636 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1637 } 1638 PetscFunctionReturn(0); 1639 } 1640 1641 #undef __FUNCT__ 1642 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1643 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1644 { 1645 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1646 Mat a = baij->A,b = baij->B; 1647 PetscErrorCode ierr; 1648 PetscInt s1,s2,s3; 1649 1650 PetscFunctionBegin; 1651 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1652 if (rr) { 1653 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1654 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1655 /* Overlap communication with computation. */ 1656 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 } 1658 if (ll) { 1659 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1660 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1661 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1662 } 1663 /* scale the diagonal block */ 1664 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1665 1666 if (rr) { 1667 /* Do a scatter end and then right scale the off-diagonal block */ 1668 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1669 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1670 } 1671 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1677 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1678 { 1679 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1680 PetscErrorCode ierr; 1681 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1682 PetscInt i,*owners = A->rmap.range; 1683 PetscInt *nprocs,j,idx,nsends,row; 1684 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1685 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1686 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 1687 MPI_Comm comm = A->comm; 1688 MPI_Request *send_waits,*recv_waits; 1689 MPI_Status recv_status,*send_status; 1690 #if defined(PETSC_DEBUG) 1691 PetscTruth found = PETSC_FALSE; 1692 #endif 1693 1694 PetscFunctionBegin; 1695 /* first count number of contributors to each processor */ 1696 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1697 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1698 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1699 j = 0; 1700 for (i=0; i<N; i++) { 1701 if (lastidx > (idx = rows[i])) j = 0; 1702 lastidx = idx; 1703 for (; j<size; j++) { 1704 if (idx >= owners[j] && idx < owners[j+1]) { 1705 nprocs[2*j]++; 1706 nprocs[2*j+1] = 1; 1707 owner[i] = j; 1708 #if defined(PETSC_DEBUG) 1709 found = PETSC_TRUE; 1710 #endif 1711 break; 1712 } 1713 } 1714 #if defined(PETSC_DEBUG) 1715 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1716 found = PETSC_FALSE; 1717 #endif 1718 } 1719 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1720 1721 /* inform other processors of number of messages and max length*/ 1722 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1723 1724 /* post receives: */ 1725 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1726 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1727 for (i=0; i<nrecvs; i++) { 1728 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1729 } 1730 1731 /* do sends: 1732 1) starts[i] gives the starting index in svalues for stuff going to 1733 the ith processor 1734 */ 1735 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1736 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1737 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1738 starts[0] = 0; 1739 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1740 for (i=0; i<N; i++) { 1741 svalues[starts[owner[i]]++] = rows[i]; 1742 } 1743 1744 starts[0] = 0; 1745 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1746 count = 0; 1747 for (i=0; i<size; i++) { 1748 if (nprocs[2*i+1]) { 1749 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1750 } 1751 } 1752 ierr = PetscFree(starts);CHKERRQ(ierr); 1753 1754 base = owners[rank]; 1755 1756 /* wait on receives */ 1757 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1758 source = lens + nrecvs; 1759 count = nrecvs; slen = 0; 1760 while (count) { 1761 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1762 /* unpack receives into our local space */ 1763 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1764 source[imdex] = recv_status.MPI_SOURCE; 1765 lens[imdex] = n; 1766 slen += n; 1767 count--; 1768 } 1769 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1770 1771 /* move the data into the send scatter */ 1772 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1773 count = 0; 1774 for (i=0; i<nrecvs; i++) { 1775 values = rvalues + i*nmax; 1776 for (j=0; j<lens[i]; j++) { 1777 lrows[count++] = values[j] - base; 1778 } 1779 } 1780 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1781 ierr = PetscFree(lens);CHKERRQ(ierr); 1782 ierr = PetscFree(owner);CHKERRQ(ierr); 1783 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1784 1785 /* actually zap the local rows */ 1786 /* 1787 Zero the required rows. If the "diagonal block" of the matrix 1788 is square and the user wishes to set the diagonal we use separate 1789 code so that MatSetValues() is not called for each diagonal allocating 1790 new memory, thus calling lots of mallocs and slowing things down. 1791 1792 Contributed by: Matthew Knepley 1793 */ 1794 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1795 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1796 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1797 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1798 } else if (diag != 0.0) { 1799 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1800 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1801 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1802 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1803 } 1804 for (i=0; i<slen; i++) { 1805 row = lrows[i] + rstart_bs; 1806 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1807 } 1808 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1809 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1810 } else { 1811 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1812 } 1813 1814 ierr = PetscFree(lrows);CHKERRQ(ierr); 1815 1816 /* wait on sends */ 1817 if (nsends) { 1818 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1819 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1820 ierr = PetscFree(send_status);CHKERRQ(ierr); 1821 } 1822 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1823 ierr = PetscFree(svalues);CHKERRQ(ierr); 1824 1825 PetscFunctionReturn(0); 1826 } 1827 1828 #undef __FUNCT__ 1829 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1830 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1831 { 1832 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1833 PetscErrorCode ierr; 1834 1835 PetscFunctionBegin; 1836 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1837 PetscFunctionReturn(0); 1838 } 1839 1840 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1841 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "MatEqual_MPIBAIJ" 1844 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1845 { 1846 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1847 Mat a,b,c,d; 1848 PetscTruth flg; 1849 PetscErrorCode ierr; 1850 1851 PetscFunctionBegin; 1852 a = matA->A; b = matA->B; 1853 c = matB->A; d = matB->B; 1854 1855 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1856 if (flg) { 1857 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1858 } 1859 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1860 PetscFunctionReturn(0); 1861 } 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatCopy_MPIBAIJ" 1865 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1866 { 1867 PetscErrorCode ierr; 1868 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1869 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1870 1871 PetscFunctionBegin; 1872 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1873 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1874 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1875 } else { 1876 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1877 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1878 } 1879 PetscFunctionReturn(0); 1880 } 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1884 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1885 { 1886 PetscErrorCode ierr; 1887 1888 PetscFunctionBegin; 1889 ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #include "petscblaslapack.h" 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1896 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1897 { 1898 PetscErrorCode ierr; 1899 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1900 PetscBLASInt bnz,one=1; 1901 Mat_SeqBAIJ *x,*y; 1902 1903 PetscFunctionBegin; 1904 if (str == SAME_NONZERO_PATTERN) { 1905 PetscScalar alpha = a; 1906 x = (Mat_SeqBAIJ *)xx->A->data; 1907 y = (Mat_SeqBAIJ *)yy->A->data; 1908 bnz = (PetscBLASInt)x->nz; 1909 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1910 x = (Mat_SeqBAIJ *)xx->B->data; 1911 y = (Mat_SeqBAIJ *)yy->B->data; 1912 bnz = (PetscBLASInt)x->nz; 1913 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1914 } else { 1915 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1916 } 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1922 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1923 { 1924 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1925 PetscErrorCode ierr; 1926 1927 PetscFunctionBegin; 1928 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1929 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1930 PetscFunctionReturn(0); 1931 } 1932 1933 #undef __FUNCT__ 1934 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1935 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1936 { 1937 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1942 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 /* -------------------------------------------------------------------*/ 1947 static struct _MatOps MatOps_Values = { 1948 MatSetValues_MPIBAIJ, 1949 MatGetRow_MPIBAIJ, 1950 MatRestoreRow_MPIBAIJ, 1951 MatMult_MPIBAIJ, 1952 /* 4*/ MatMultAdd_MPIBAIJ, 1953 MatMultTranspose_MPIBAIJ, 1954 MatMultTransposeAdd_MPIBAIJ, 1955 0, 1956 0, 1957 0, 1958 /*10*/ 0, 1959 0, 1960 0, 1961 0, 1962 MatTranspose_MPIBAIJ, 1963 /*15*/ MatGetInfo_MPIBAIJ, 1964 MatEqual_MPIBAIJ, 1965 MatGetDiagonal_MPIBAIJ, 1966 MatDiagonalScale_MPIBAIJ, 1967 MatNorm_MPIBAIJ, 1968 /*20*/ MatAssemblyBegin_MPIBAIJ, 1969 MatAssemblyEnd_MPIBAIJ, 1970 0, 1971 MatSetOption_MPIBAIJ, 1972 MatZeroEntries_MPIBAIJ, 1973 /*25*/ MatZeroRows_MPIBAIJ, 1974 0, 1975 0, 1976 0, 1977 0, 1978 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1979 0, 1980 0, 1981 0, 1982 0, 1983 /*35*/ MatDuplicate_MPIBAIJ, 1984 0, 1985 0, 1986 0, 1987 0, 1988 /*40*/ MatAXPY_MPIBAIJ, 1989 MatGetSubMatrices_MPIBAIJ, 1990 MatIncreaseOverlap_MPIBAIJ, 1991 MatGetValues_MPIBAIJ, 1992 MatCopy_MPIBAIJ, 1993 /*45*/ 0, 1994 MatScale_MPIBAIJ, 1995 0, 1996 0, 1997 0, 1998 /*50*/ 0, 1999 0, 2000 0, 2001 0, 2002 0, 2003 /*55*/ 0, 2004 0, 2005 MatSetUnfactored_MPIBAIJ, 2006 0, 2007 MatSetValuesBlocked_MPIBAIJ, 2008 /*60*/ 0, 2009 MatDestroy_MPIBAIJ, 2010 MatView_MPIBAIJ, 2011 0, 2012 0, 2013 /*65*/ 0, 2014 0, 2015 0, 2016 0, 2017 0, 2018 /*70*/ MatGetRowMaxAbs_MPIBAIJ, 2019 0, 2020 0, 2021 0, 2022 0, 2023 /*75*/ 0, 2024 0, 2025 0, 2026 0, 2027 0, 2028 /*80*/ 0, 2029 0, 2030 0, 2031 0, 2032 MatLoad_MPIBAIJ, 2033 /*85*/ 0, 2034 0, 2035 0, 2036 0, 2037 0, 2038 /*90*/ 0, 2039 0, 2040 0, 2041 0, 2042 0, 2043 /*95*/ 0, 2044 0, 2045 0, 2046 0, 2047 0, 2048 /*100*/0, 2049 0, 2050 0, 2051 0, 2052 0, 2053 /*105*/0, 2054 MatRealPart_MPIBAIJ, 2055 MatImaginaryPart_MPIBAIJ}; 2056 2057 2058 EXTERN_C_BEGIN 2059 #undef __FUNCT__ 2060 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2061 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2062 { 2063 PetscFunctionBegin; 2064 *a = ((Mat_MPIBAIJ *)A->data)->A; 2065 *iscopy = PETSC_FALSE; 2066 PetscFunctionReturn(0); 2067 } 2068 EXTERN_C_END 2069 2070 EXTERN_C_BEGIN 2071 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2072 EXTERN_C_END 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2076 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2077 { 2078 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 2079 PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 2080 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 2081 const PetscInt *JJ; 2082 PetscScalar *values; 2083 PetscErrorCode ierr; 2084 2085 PetscFunctionBegin; 2086 #if defined(PETSC_OPT_g) 2087 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2088 #endif 2089 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2090 o_nnz = d_nnz + m; 2091 2092 for (i=0; i<m; i++) { 2093 nnz = Ii[i+1]- Ii[i]; 2094 JJ = J + Ii[i]; 2095 nnz_max = PetscMax(nnz_max,nnz); 2096 #if defined(PETSC_OPT_g) 2097 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2098 #endif 2099 for (j=0; j<nnz; j++) { 2100 if (*JJ >= cstart) break; 2101 JJ++; 2102 } 2103 d = 0; 2104 for (; j<nnz; j++) { 2105 if (*JJ++ >= cend) break; 2106 d++; 2107 } 2108 d_nnz[i] = d; 2109 o_nnz[i] = nnz - d; 2110 } 2111 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2112 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2113 2114 if (v) values = (PetscScalar*)v; 2115 else { 2116 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2117 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2118 } 2119 2120 ierr = MatSetOption(B,MAT_COLUMNS_SORTED,PETSC_TRUE);CHKERRQ(ierr); 2121 for (i=0; i<m; i++) { 2122 ii = i + rstart; 2123 nnz = Ii[i+1]- Ii[i]; 2124 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2125 } 2126 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2127 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2128 ierr = MatSetOption(B,MAT_COLUMNS_SORTED,PETSC_FALSE);CHKERRQ(ierr); 2129 2130 if (!v) { 2131 ierr = PetscFree(values);CHKERRQ(ierr); 2132 } 2133 PetscFunctionReturn(0); 2134 } 2135 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2138 /*@C 2139 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2140 (the default parallel PETSc format). 2141 2142 Collective on MPI_Comm 2143 2144 Input Parameters: 2145 + A - the matrix 2146 . i - the indices into j for the start of each local row (starts with zero) 2147 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2148 - v - optional values in the matrix 2149 2150 Level: developer 2151 2152 .keywords: matrix, aij, compressed row, sparse, parallel 2153 2154 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2155 @*/ 2156 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2157 { 2158 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2159 2160 PetscFunctionBegin; 2161 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2162 if (f) { 2163 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 EXTERN_C_BEGIN 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2171 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2172 { 2173 Mat_MPIBAIJ *b; 2174 PetscErrorCode ierr; 2175 PetscInt i; 2176 2177 PetscFunctionBegin; 2178 B->preallocated = PETSC_TRUE; 2179 ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2180 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2181 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2182 2183 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2184 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2185 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2186 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2187 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2188 2189 B->rmap.bs = bs; 2190 B->cmap.bs = bs; 2191 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2192 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2193 2194 if (d_nnz) { 2195 for (i=0; i<B->rmap.n/bs; i++) { 2196 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2197 } 2198 } 2199 if (o_nnz) { 2200 for (i=0; i<B->rmap.n/bs; i++) { 2201 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2202 } 2203 } 2204 2205 b = (Mat_MPIBAIJ*)B->data; 2206 b->bs2 = bs*bs; 2207 b->mbs = B->rmap.n/bs; 2208 b->nbs = B->cmap.n/bs; 2209 b->Mbs = B->rmap.N/bs; 2210 b->Nbs = B->cmap.N/bs; 2211 2212 for (i=0; i<=b->size; i++) { 2213 b->rangebs[i] = B->rmap.range[i]/bs; 2214 } 2215 b->rstartbs = B->rmap.rstart/bs; 2216 b->rendbs = B->rmap.rend/bs; 2217 b->cstartbs = B->cmap.rstart/bs; 2218 b->cendbs = B->cmap.rend/bs; 2219 2220 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2221 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2222 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2223 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2224 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2225 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2226 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2227 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2228 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2229 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2230 2231 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2232 2233 PetscFunctionReturn(0); 2234 } 2235 EXTERN_C_END 2236 2237 EXTERN_C_BEGIN 2238 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2239 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2240 EXTERN_C_END 2241 2242 /*MC 2243 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2244 2245 Options Database Keys: 2246 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2247 . -mat_block_size <bs> - set the blocksize used to store the matrix 2248 - -mat_use_hash_table <fact> 2249 2250 Level: beginner 2251 2252 .seealso: MatCreateMPIBAIJ 2253 M*/ 2254 2255 EXTERN_C_BEGIN 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatCreate_MPIBAIJ" 2258 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2259 { 2260 Mat_MPIBAIJ *b; 2261 PetscErrorCode ierr; 2262 PetscTruth flg; 2263 2264 PetscFunctionBegin; 2265 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2266 B->data = (void*)b; 2267 2268 2269 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2270 B->mapping = 0; 2271 B->factor = 0; 2272 B->assembled = PETSC_FALSE; 2273 2274 B->insertmode = NOT_SET_VALUES; 2275 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2276 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2277 2278 /* build local table of row and column ownerships */ 2279 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2280 2281 /* build cache for off array entries formed */ 2282 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2283 b->donotstash = PETSC_FALSE; 2284 b->colmap = PETSC_NULL; 2285 b->garray = PETSC_NULL; 2286 b->roworiented = PETSC_TRUE; 2287 2288 #if defined(PETSC_USE_MAT_SINGLE) 2289 /* stuff for MatSetValues_XXX in single precision */ 2290 b->setvalueslen = 0; 2291 b->setvaluescopy = PETSC_NULL; 2292 #endif 2293 2294 /* stuff used in block assembly */ 2295 b->barray = 0; 2296 2297 /* stuff used for matrix vector multiply */ 2298 b->lvec = 0; 2299 b->Mvctx = 0; 2300 2301 /* stuff for MatGetRow() */ 2302 b->rowindices = 0; 2303 b->rowvalues = 0; 2304 b->getrowactive = PETSC_FALSE; 2305 2306 /* hash table stuff */ 2307 b->ht = 0; 2308 b->hd = 0; 2309 b->ht_size = 0; 2310 b->ht_flag = PETSC_FALSE; 2311 b->ht_fact = 0; 2312 b->ht_total_ct = 0; 2313 b->ht_insert_ct = 0; 2314 2315 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2316 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2317 if (flg) { 2318 PetscReal fact = 1.39; 2319 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2320 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2321 if (fact <= 1.0) fact = 1.39; 2322 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2323 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2324 } 2325 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2326 2327 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2328 "MatStoreValues_MPIBAIJ", 2329 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2330 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2331 "MatRetrieveValues_MPIBAIJ", 2332 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2333 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2334 "MatGetDiagonalBlock_MPIBAIJ", 2335 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2336 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2337 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2338 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2339 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2340 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 2341 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2342 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2343 "MatDiagonalScaleLocal_MPIBAIJ", 2344 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2345 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2346 "MatSetHashTableFactor_MPIBAIJ", 2347 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2348 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2349 PetscFunctionReturn(0); 2350 } 2351 EXTERN_C_END 2352 2353 /*MC 2354 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2355 2356 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2357 and MATMPIBAIJ otherwise. 2358 2359 Options Database Keys: 2360 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2361 2362 Level: beginner 2363 2364 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2365 M*/ 2366 2367 EXTERN_C_BEGIN 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatCreate_BAIJ" 2370 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2371 { 2372 PetscErrorCode ierr; 2373 PetscMPIInt size; 2374 2375 PetscFunctionBegin; 2376 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2377 if (size == 1) { 2378 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2379 } else { 2380 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2381 } 2382 PetscFunctionReturn(0); 2383 } 2384 EXTERN_C_END 2385 2386 #undef __FUNCT__ 2387 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2388 /*@C 2389 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2390 (block compressed row). For good matrix assembly performance 2391 the user should preallocate the matrix storage by setting the parameters 2392 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2393 performance can be increased by more than a factor of 50. 2394 2395 Collective on Mat 2396 2397 Input Parameters: 2398 + A - the matrix 2399 . bs - size of blockk 2400 . d_nz - number of block nonzeros per block row in diagonal portion of local 2401 submatrix (same for all local rows) 2402 . d_nnz - array containing the number of block nonzeros in the various block rows 2403 of the in diagonal portion of the local (possibly different for each block 2404 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2405 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2406 submatrix (same for all local rows). 2407 - o_nnz - array containing the number of nonzeros in the various block rows of the 2408 off-diagonal portion of the local submatrix (possibly different for 2409 each block row) or PETSC_NULL. 2410 2411 If the *_nnz parameter is given then the *_nz parameter is ignored 2412 2413 Options Database Keys: 2414 + -mat_block_size - size of the blocks to use 2415 - -mat_use_hash_table <fact> 2416 2417 Notes: 2418 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2419 than it must be used on all processors that share the object for that argument. 2420 2421 Storage Information: 2422 For a square global matrix we define each processor's diagonal portion 2423 to be its local rows and the corresponding columns (a square submatrix); 2424 each processor's off-diagonal portion encompasses the remainder of the 2425 local matrix (a rectangular submatrix). 2426 2427 The user can specify preallocated storage for the diagonal part of 2428 the local submatrix with either d_nz or d_nnz (not both). Set 2429 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2430 memory allocation. Likewise, specify preallocated storage for the 2431 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2432 2433 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2434 the figure below we depict these three local rows and all columns (0-11). 2435 2436 .vb 2437 0 1 2 3 4 5 6 7 8 9 10 11 2438 ------------------- 2439 row 3 | o o o d d d o o o o o o 2440 row 4 | o o o d d d o o o o o o 2441 row 5 | o o o d d d o o o o o o 2442 ------------------- 2443 .ve 2444 2445 Thus, any entries in the d locations are stored in the d (diagonal) 2446 submatrix, and any entries in the o locations are stored in the 2447 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2448 stored simply in the MATSEQBAIJ format for compressed row storage. 2449 2450 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2451 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2452 In general, for PDE problems in which most nonzeros are near the diagonal, 2453 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2454 or you will get TERRIBLE performance; see the users' manual chapter on 2455 matrices. 2456 2457 Level: intermediate 2458 2459 .keywords: matrix, block, aij, compressed row, sparse, parallel 2460 2461 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2462 @*/ 2463 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2464 { 2465 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2466 2467 PetscFunctionBegin; 2468 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2469 if (f) { 2470 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2471 } 2472 PetscFunctionReturn(0); 2473 } 2474 2475 #undef __FUNCT__ 2476 #define __FUNCT__ "MatCreateMPIBAIJ" 2477 /*@C 2478 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2479 (block compressed row). For good matrix assembly performance 2480 the user should preallocate the matrix storage by setting the parameters 2481 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2482 performance can be increased by more than a factor of 50. 2483 2484 Collective on MPI_Comm 2485 2486 Input Parameters: 2487 + comm - MPI communicator 2488 . bs - size of blockk 2489 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2490 This value should be the same as the local size used in creating the 2491 y vector for the matrix-vector product y = Ax. 2492 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2493 This value should be the same as the local size used in creating the 2494 x vector for the matrix-vector product y = Ax. 2495 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2496 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2497 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2498 submatrix (same for all local rows) 2499 . d_nnz - array containing the number of nonzero blocks in the various block rows 2500 of the in diagonal portion of the local (possibly different for each block 2501 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2502 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2503 submatrix (same for all local rows). 2504 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2505 off-diagonal portion of the local submatrix (possibly different for 2506 each block row) or PETSC_NULL. 2507 2508 Output Parameter: 2509 . A - the matrix 2510 2511 Options Database Keys: 2512 + -mat_block_size - size of the blocks to use 2513 - -mat_use_hash_table <fact> 2514 2515 Notes: 2516 If the *_nnz parameter is given then the *_nz parameter is ignored 2517 2518 A nonzero block is any block that as 1 or more nonzeros in it 2519 2520 The user MUST specify either the local or global matrix dimensions 2521 (possibly both). 2522 2523 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2524 than it must be used on all processors that share the object for that argument. 2525 2526 Storage Information: 2527 For a square global matrix we define each processor's diagonal portion 2528 to be its local rows and the corresponding columns (a square submatrix); 2529 each processor's off-diagonal portion encompasses the remainder of the 2530 local matrix (a rectangular submatrix). 2531 2532 The user can specify preallocated storage for the diagonal part of 2533 the local submatrix with either d_nz or d_nnz (not both). Set 2534 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2535 memory allocation. Likewise, specify preallocated storage for the 2536 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2537 2538 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2539 the figure below we depict these three local rows and all columns (0-11). 2540 2541 .vb 2542 0 1 2 3 4 5 6 7 8 9 10 11 2543 ------------------- 2544 row 3 | o o o d d d o o o o o o 2545 row 4 | o o o d d d o o o o o o 2546 row 5 | o o o d d d o o o o o o 2547 ------------------- 2548 .ve 2549 2550 Thus, any entries in the d locations are stored in the d (diagonal) 2551 submatrix, and any entries in the o locations are stored in the 2552 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2553 stored simply in the MATSEQBAIJ format for compressed row storage. 2554 2555 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2556 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2557 In general, for PDE problems in which most nonzeros are near the diagonal, 2558 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2559 or you will get TERRIBLE performance; see the users' manual chapter on 2560 matrices. 2561 2562 Level: intermediate 2563 2564 .keywords: matrix, block, aij, compressed row, sparse, parallel 2565 2566 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2567 @*/ 2568 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2569 { 2570 PetscErrorCode ierr; 2571 PetscMPIInt size; 2572 2573 PetscFunctionBegin; 2574 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2575 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2576 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2577 if (size > 1) { 2578 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2579 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2580 } else { 2581 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2582 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2583 } 2584 PetscFunctionReturn(0); 2585 } 2586 2587 #undef __FUNCT__ 2588 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2589 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2590 { 2591 Mat mat; 2592 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2593 PetscErrorCode ierr; 2594 PetscInt len=0; 2595 2596 PetscFunctionBegin; 2597 *newmat = 0; 2598 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2599 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2600 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2601 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2602 2603 mat->factor = matin->factor; 2604 mat->preallocated = PETSC_TRUE; 2605 mat->assembled = PETSC_TRUE; 2606 mat->insertmode = NOT_SET_VALUES; 2607 2608 a = (Mat_MPIBAIJ*)mat->data; 2609 mat->rmap.bs = matin->rmap.bs; 2610 a->bs2 = oldmat->bs2; 2611 a->mbs = oldmat->mbs; 2612 a->nbs = oldmat->nbs; 2613 a->Mbs = oldmat->Mbs; 2614 a->Nbs = oldmat->Nbs; 2615 2616 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2617 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2618 2619 a->size = oldmat->size; 2620 a->rank = oldmat->rank; 2621 a->donotstash = oldmat->donotstash; 2622 a->roworiented = oldmat->roworiented; 2623 a->rowindices = 0; 2624 a->rowvalues = 0; 2625 a->getrowactive = PETSC_FALSE; 2626 a->barray = 0; 2627 a->rstartbs = oldmat->rstartbs; 2628 a->rendbs = oldmat->rendbs; 2629 a->cstartbs = oldmat->cstartbs; 2630 a->cendbs = oldmat->cendbs; 2631 2632 /* hash table stuff */ 2633 a->ht = 0; 2634 a->hd = 0; 2635 a->ht_size = 0; 2636 a->ht_flag = oldmat->ht_flag; 2637 a->ht_fact = oldmat->ht_fact; 2638 a->ht_total_ct = 0; 2639 a->ht_insert_ct = 0; 2640 2641 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 2642 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2643 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2644 if (oldmat->colmap) { 2645 #if defined (PETSC_USE_CTABLE) 2646 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2647 #else 2648 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2649 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2650 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2651 #endif 2652 } else a->colmap = 0; 2653 2654 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2655 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2656 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2657 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2658 } else a->garray = 0; 2659 2660 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2661 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2662 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2663 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2664 2665 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2666 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2667 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2668 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2669 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2670 *newmat = mat; 2671 2672 PetscFunctionReturn(0); 2673 } 2674 2675 #include "petscsys.h" 2676 2677 #undef __FUNCT__ 2678 #define __FUNCT__ "MatLoad_MPIBAIJ" 2679 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2680 { 2681 Mat A; 2682 PetscErrorCode ierr; 2683 int fd; 2684 PetscInt i,nz,j,rstart,rend; 2685 PetscScalar *vals,*buf; 2686 MPI_Comm comm = ((PetscObject)viewer)->comm; 2687 MPI_Status status; 2688 PetscMPIInt rank,size,maxnz; 2689 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2690 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2691 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2692 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2693 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2694 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2695 2696 PetscFunctionBegin; 2697 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 2698 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2699 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2700 2701 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2702 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2703 if (!rank) { 2704 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2705 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2706 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2707 } 2708 2709 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2710 M = header[1]; N = header[2]; 2711 2712 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2713 2714 /* 2715 This code adds extra rows to make sure the number of rows is 2716 divisible by the blocksize 2717 */ 2718 Mbs = M/bs; 2719 extra_rows = bs - M + bs*Mbs; 2720 if (extra_rows == bs) extra_rows = 0; 2721 else Mbs++; 2722 if (extra_rows && !rank) { 2723 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2724 } 2725 2726 /* determine ownership of all rows */ 2727 mbs = Mbs/size + ((Mbs % size) > rank); 2728 m = mbs*bs; 2729 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2730 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2731 2732 /* process 0 needs enough room for process with most rows */ 2733 if (!rank) { 2734 mmax = rowners[1]; 2735 for (i=2; i<size; i++) { 2736 mmax = PetscMax(mmax,rowners[i]); 2737 } 2738 mmax*=bs; 2739 } else mmax = m; 2740 2741 rowners[0] = 0; 2742 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2743 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2744 rstart = rowners[rank]; 2745 rend = rowners[rank+1]; 2746 2747 /* distribute row lengths to all processors */ 2748 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2749 if (!rank) { 2750 mend = m; 2751 if (size == 1) mend = mend - extra_rows; 2752 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2753 for (j=mend; j<m; j++) locrowlens[j] = 1; 2754 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2755 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2756 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2757 for (j=0; j<m; j++) { 2758 procsnz[0] += locrowlens[j]; 2759 } 2760 for (i=1; i<size; i++) { 2761 mend = browners[i+1] - browners[i]; 2762 if (i == size-1) mend = mend - extra_rows; 2763 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2764 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2765 /* calculate the number of nonzeros on each processor */ 2766 for (j=0; j<browners[i+1]-browners[i]; j++) { 2767 procsnz[i] += rowlengths[j]; 2768 } 2769 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2770 } 2771 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2772 } else { 2773 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2774 } 2775 2776 if (!rank) { 2777 /* determine max buffer needed and allocate it */ 2778 maxnz = procsnz[0]; 2779 for (i=1; i<size; i++) { 2780 maxnz = PetscMax(maxnz,procsnz[i]); 2781 } 2782 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2783 2784 /* read in my part of the matrix column indices */ 2785 nz = procsnz[0]; 2786 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2787 mycols = ibuf; 2788 if (size == 1) nz -= extra_rows; 2789 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2790 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2791 2792 /* read in every ones (except the last) and ship off */ 2793 for (i=1; i<size-1; i++) { 2794 nz = procsnz[i]; 2795 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2796 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2797 } 2798 /* read in the stuff for the last proc */ 2799 if (size != 1) { 2800 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2801 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2802 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2803 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2804 } 2805 ierr = PetscFree(cols);CHKERRQ(ierr); 2806 } else { 2807 /* determine buffer space needed for message */ 2808 nz = 0; 2809 for (i=0; i<m; i++) { 2810 nz += locrowlens[i]; 2811 } 2812 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2813 mycols = ibuf; 2814 /* receive message of column indices*/ 2815 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2816 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2817 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2818 } 2819 2820 /* loop over local rows, determining number of off diagonal entries */ 2821 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2822 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2823 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2824 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2825 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2826 rowcount = 0; nzcount = 0; 2827 for (i=0; i<mbs; i++) { 2828 dcount = 0; 2829 odcount = 0; 2830 for (j=0; j<bs; j++) { 2831 kmax = locrowlens[rowcount]; 2832 for (k=0; k<kmax; k++) { 2833 tmp = mycols[nzcount++]/bs; 2834 if (!mask[tmp]) { 2835 mask[tmp] = 1; 2836 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2837 else masked1[dcount++] = tmp; 2838 } 2839 } 2840 rowcount++; 2841 } 2842 2843 dlens[i] = dcount; 2844 odlens[i] = odcount; 2845 2846 /* zero out the mask elements we set */ 2847 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2848 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2849 } 2850 2851 /* create our matrix */ 2852 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2853 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2854 ierr = MatSetType(A,type);CHKERRQ(ierr) 2855 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2856 2857 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2858 ierr = MatSetOption(A,MAT_COLUMNS_SORTED,PETSC_TRUE);CHKERRQ(ierr); 2859 2860 if (!rank) { 2861 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2862 /* read in my part of the matrix numerical values */ 2863 nz = procsnz[0]; 2864 vals = buf; 2865 mycols = ibuf; 2866 if (size == 1) nz -= extra_rows; 2867 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2868 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2869 2870 /* insert into matrix */ 2871 jj = rstart*bs; 2872 for (i=0; i<m; i++) { 2873 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2874 mycols += locrowlens[i]; 2875 vals += locrowlens[i]; 2876 jj++; 2877 } 2878 /* read in other processors (except the last one) and ship out */ 2879 for (i=1; i<size-1; i++) { 2880 nz = procsnz[i]; 2881 vals = buf; 2882 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2883 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2884 } 2885 /* the last proc */ 2886 if (size != 1){ 2887 nz = procsnz[i] - extra_rows; 2888 vals = buf; 2889 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2890 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2891 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2892 } 2893 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2894 } else { 2895 /* receive numeric values */ 2896 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2897 2898 /* receive message of values*/ 2899 vals = buf; 2900 mycols = ibuf; 2901 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2902 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2903 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2904 2905 /* insert into matrix */ 2906 jj = rstart*bs; 2907 for (i=0; i<m; i++) { 2908 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2909 mycols += locrowlens[i]; 2910 vals += locrowlens[i]; 2911 jj++; 2912 } 2913 } 2914 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2915 ierr = PetscFree(buf);CHKERRQ(ierr); 2916 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2917 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2918 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2919 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2920 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2921 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2922 2923 *newmat = A; 2924 PetscFunctionReturn(0); 2925 } 2926 2927 #undef __FUNCT__ 2928 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2929 /*@ 2930 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2931 2932 Input Parameters: 2933 . mat - the matrix 2934 . fact - factor 2935 2936 Collective on Mat 2937 2938 Level: advanced 2939 2940 Notes: 2941 This can also be set by the command line option: -mat_use_hash_table <fact> 2942 2943 .keywords: matrix, hashtable, factor, HT 2944 2945 .seealso: MatSetOption() 2946 @*/ 2947 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2948 { 2949 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2950 2951 PetscFunctionBegin; 2952 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2953 if (f) { 2954 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2955 } 2956 PetscFunctionReturn(0); 2957 } 2958 2959 EXTERN_C_BEGIN 2960 #undef __FUNCT__ 2961 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2962 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2963 { 2964 Mat_MPIBAIJ *baij; 2965 2966 PetscFunctionBegin; 2967 baij = (Mat_MPIBAIJ*)mat->data; 2968 baij->ht_fact = fact; 2969 PetscFunctionReturn(0); 2970 } 2971 EXTERN_C_END 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2975 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2976 { 2977 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2978 PetscFunctionBegin; 2979 *Ad = a->A; 2980 *Ao = a->B; 2981 *colmap = a->garray; 2982 PetscFunctionReturn(0); 2983 } 2984