1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 #include <petscsf.h> 5 6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 18 { 19 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt i,*idxb = 0; 22 PetscScalar *va,*vb; 23 Vec vtmp; 24 25 PetscFunctionBegin; 26 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 27 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 28 if (idx) { 29 for (i=0; i<A->rmap->n; i++) { 30 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 31 } 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++) { 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 41 va[i] = vb[i]; 42 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 43 } 44 } 45 46 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 47 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 48 ierr = PetscFree(idxb);CHKERRQ(ierr); 49 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scalable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined(PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++) { 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 110 { \ 111 \ 112 brow = row/bs; \ 113 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 114 rmax = aimax[brow]; nrow = ailen[brow]; \ 115 bcol = col/bs; \ 116 ridx = row % bs; cidx = col % bs; \ 117 low = 0; high = nrow; \ 118 while (high-low > 3) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > bcol) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > bcol) break; \ 125 if (rp[_i] == bcol) { \ 126 bap = ap + bs2*_i + bs*cidx + ridx; \ 127 if (addv == ADD_VALUES) *bap += value; \ 128 else *bap = value; \ 129 goto a_noinsert; \ 130 } \ 131 } \ 132 if (a->nonew == 1) goto a_noinsert; \ 133 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 134 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 135 N = nrow++ - 1; \ 136 /* shift up all the later entries in this row */ \ 137 for (ii=N; ii>=_i; ii--) { \ 138 rp[ii+1] = rp[ii]; \ 139 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 140 } \ 141 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 142 rp[_i] = bcol; \ 143 ap[bs2*_i + bs*cidx + ridx] = value; \ 144 a_noinsert:; \ 145 ailen[brow] = nrow; \ 146 } 147 148 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 149 { \ 150 brow = row/bs; \ 151 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152 rmax = bimax[brow]; nrow = bilen[brow]; \ 153 bcol = col/bs; \ 154 ridx = row % bs; cidx = col % bs; \ 155 low = 0; high = nrow; \ 156 while (high-low > 3) { \ 157 t = (low+high)/2; \ 158 if (rp[t] > bcol) high = t; \ 159 else low = t; \ 160 } \ 161 for (_i=low; _i<high; _i++) { \ 162 if (rp[_i] > bcol) break; \ 163 if (rp[_i] == bcol) { \ 164 bap = ap + bs2*_i + bs*cidx + ridx; \ 165 if (addv == ADD_VALUES) *bap += value; \ 166 else *bap = value; \ 167 goto b_noinsert; \ 168 } \ 169 } \ 170 if (b->nonew == 1) goto b_noinsert; \ 171 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 172 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 173 N = nrow++ - 1; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 178 } \ 179 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 180 rp[_i] = bcol; \ 181 ap[bs2*_i + bs*cidx + ridx] = value; \ 182 b_noinsert:; \ 183 bilen[brow] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIBAIJ" 188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 191 MatScalar value; 192 PetscBool roworiented = baij->roworiented; 193 PetscErrorCode ierr; 194 PetscInt i,j,row,col; 195 PetscInt rstart_orig=mat->rmap->rstart; 196 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 197 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 198 199 /* Some Variables required in the macro */ 200 Mat A = baij->A; 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 202 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 203 MatScalar *aa =a->a; 204 205 Mat B = baij->B; 206 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 207 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 208 MatScalar *ba =b->a; 209 210 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 211 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 212 MatScalar *ap,*bap; 213 214 PetscFunctionBegin; 215 if (v) PetscValidScalarPointer(v,6); 216 for (i=0; i<m; i++) { 217 if (im[i] < 0) continue; 218 #if defined(PETSC_USE_DEBUG) 219 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for (j=0; j<n; j++) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig) { 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } else if (in[j] < 0) continue; 231 #if defined(PETSC_USE_DEBUG) 232 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined(PETSC_USE_CTABLE) 240 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 241 col = col - 1; 242 #else 243 col = baij->colmap[in[j]/bs] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 246 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 249 B = baij->B; 250 b = (Mat_SeqBAIJ*)(B)->data; 251 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 252 ba =b->a; 253 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 254 else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; 257 else value = v[i+j*m]; 258 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 259 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 260 } 261 } 262 } else { 263 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 264 if (!baij->donotstash) { 265 mat->assembled = PETSC_FALSE; 266 if (roworiented) { 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 268 } else { 269 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 270 } 271 } 272 } 273 } 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 280 { 281 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 282 const PetscScalar *value; 283 MatScalar *barray = baij->barray; 284 PetscBool roworiented = baij->roworiented; 285 PetscErrorCode ierr; 286 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 287 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 288 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 289 290 PetscFunctionBegin; 291 if (!barray) { 292 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 293 baij->barray = barray; 294 } 295 296 if (roworiented) stepval = (n-1)*bs; 297 else stepval = (m-1)*bs; 298 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if ((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 320 barray += bs; 321 } 322 barray -= bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend) { 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined(PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined(PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 355 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 358 } else col = in[j]; 359 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 360 } 361 } 362 } else { 363 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 364 if (!baij->donotstash) { 365 if (roworiented) { 366 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 367 } else { 368 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } 370 } 371 } 372 } 373 PetscFunctionReturn(0); 374 } 375 376 #define HASH_KEY 0.6180339887 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 383 { 384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 385 PetscBool roworiented = baij->roworiented; 386 PetscErrorCode ierr; 387 PetscInt i,j,row,col; 388 PetscInt rstart_orig=mat->rmap->rstart; 389 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 390 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 391 PetscReal tmp; 392 MatScalar **HD = baij->hd,value; 393 #if defined(PETSC_USE_DEBUG) 394 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 395 #endif 396 397 PetscFunctionBegin; 398 if (v) PetscValidScalarPointer(v,6); 399 for (i=0; i<m; i++) { 400 #if defined(PETSC_USE_DEBUG) 401 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 402 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 403 #endif 404 row = im[i]; 405 if (row >= rstart_orig && row < rend_orig) { 406 for (j=0; j<n; j++) { 407 col = in[j]; 408 if (roworiented) value = v[i*n+j]; 409 else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 423 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 424 } 425 } 426 #else 427 if (HT[idx] != key) { 428 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 429 if (idx == size) { 430 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 431 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else if (!baij->donotstash) { 440 if (roworiented) { 441 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 442 } else { 443 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 444 } 445 } 446 } 447 #if defined(PETSC_USE_DEBUG) 448 baij->ht_total_ct = total_ct; 449 baij->ht_insert_ct = insert_ct; 450 #endif 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 457 { 458 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 459 PetscBool roworiented = baij->roworiented; 460 PetscErrorCode ierr; 461 PetscInt i,j,ii,jj,row,col; 462 PetscInt rstart=baij->rstartbs; 463 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 464 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 465 PetscReal tmp; 466 MatScalar **HD = baij->hd,*baij_a; 467 const PetscScalar *v_t,*value; 468 #if defined(PETSC_USE_DEBUG) 469 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 470 #endif 471 472 PetscFunctionBegin; 473 if (roworiented) stepval = (n-1)*bs; 474 else stepval = (m-1)*bs; 475 476 for (i=0; i<m; i++) { 477 #if defined(PETSC_USE_DEBUG) 478 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 479 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 480 #endif 481 row = im[i]; 482 v_t = v + i*nbs2; 483 if (row >= rstart && row < rend) { 484 for (j=0; j<n; j++) { 485 col = in[j]; 486 487 /* Look up into the Hash Table */ 488 key = row*Nbs+col+1; 489 h1 = HASH(size,key,tmp); 490 491 idx = h1; 492 #if defined(PETSC_USE_DEBUG) 493 total_ct++; 494 insert_ct++; 495 if (HT[idx] != key) { 496 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 497 if (idx == size) { 498 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 499 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 500 } 501 } 502 #else 503 if (HT[idx] != key) { 504 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 505 if (idx == size) { 506 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 507 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 508 } 509 } 510 #endif 511 baij_a = HD[idx]; 512 if (roworiented) { 513 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 514 /* value = v + (i*(stepval+bs)+j)*bs; */ 515 value = v_t; 516 v_t += bs; 517 if (addv == ADD_VALUES) { 518 for (ii=0; ii<bs; ii++,value+=stepval) { 519 for (jj=ii; jj<bs2; jj+=bs) { 520 baij_a[jj] += *value++; 521 } 522 } 523 } else { 524 for (ii=0; ii<bs; ii++,value+=stepval) { 525 for (jj=ii; jj<bs2; jj+=bs) { 526 baij_a[jj] = *value++; 527 } 528 } 529 } 530 } else { 531 value = v + j*(stepval+bs)*bs + i*bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 534 for (jj=0; jj<bs; jj++) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 540 for (jj=0; jj<bs; jj++) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } 546 } 547 } else { 548 if (!baij->donotstash) { 549 if (roworiented) { 550 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 #if defined(PETSC_USE_DEBUG) 558 baij->ht_total_ct = total_ct; 559 baij->ht_insert_ct = insert_ct; 560 #endif 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatGetValues_MPIBAIJ" 566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 567 { 568 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 569 PetscErrorCode ierr; 570 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 571 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 572 573 PetscFunctionBegin; 574 for (i=0; i<m; i++) { 575 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 576 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 577 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 578 row = idxm[i] - bsrstart; 579 for (j=0; j<n; j++) { 580 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 581 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 582 if (idxn[j] >= bscstart && idxn[j] < bscend) { 583 col = idxn[j] - bscstart; 584 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 585 } else { 586 if (!baij->colmap) { 587 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 591 data--; 592 #else 593 data = baij->colmap[idxn[j]/bs]-1; 594 #endif 595 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 596 else { 597 col = data + idxn[j]%bs; 598 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } 600 } 601 } 602 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatNorm_MPIBAIJ" 609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 610 { 611 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 612 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 613 PetscErrorCode ierr; 614 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 615 PetscReal sum = 0.0; 616 MatScalar *v; 617 618 PetscFunctionBegin; 619 if (baij->size == 1) { 620 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 621 } else { 622 if (type == NORM_FROBENIUS) { 623 v = amat->a; 624 nz = amat->nz*bs2; 625 for (i=0; i<nz; i++) { 626 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 627 } 628 v = bmat->a; 629 nz = bmat->nz*bs2; 630 for (i=0; i<nz; i++) { 631 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 632 } 633 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 634 *nrm = PetscSqrtReal(*nrm); 635 } else if (type == NORM_1) { /* max column sum */ 636 PetscReal *tmp,*tmp2; 637 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 638 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 639 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 640 v = amat->a; jj = amat->j; 641 for (i=0; i<amat->nz; i++) { 642 for (j=0; j<bs; j++) { 643 col = bs*(cstart + *jj) + j; /* column index */ 644 for (row=0; row<bs; row++) { 645 tmp[col] += PetscAbsScalar(*v); v++; 646 } 647 } 648 jj++; 649 } 650 v = bmat->a; jj = bmat->j; 651 for (i=0; i<bmat->nz; i++) { 652 for (j=0; j<bs; j++) { 653 col = bs*garray[*jj] + j; 654 for (row=0; row<bs; row++) { 655 tmp[col] += PetscAbsScalar(*v); v++; 656 } 657 } 658 jj++; 659 } 660 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 661 *nrm = 0.0; 662 for (j=0; j<mat->cmap->N; j++) { 663 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 664 } 665 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 666 } else if (type == NORM_INFINITY) { /* max row sum */ 667 PetscReal *sums; 668 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 669 sum = 0.0; 670 for (j=0; j<amat->mbs; j++) { 671 for (row=0; row<bs; row++) sums[row] = 0.0; 672 v = amat->a + bs2*amat->i[j]; 673 nz = amat->i[j+1]-amat->i[j]; 674 for (i=0; i<nz; i++) { 675 for (col=0; col<bs; col++) { 676 for (row=0; row<bs; row++) { 677 sums[row] += PetscAbsScalar(*v); v++; 678 } 679 } 680 } 681 v = bmat->a + bs2*bmat->i[j]; 682 nz = bmat->i[j+1]-bmat->i[j]; 683 for (i=0; i<nz; i++) { 684 for (col=0; col<bs; col++) { 685 for (row=0; row<bs; row++) { 686 sums[row] += PetscAbsScalar(*v); v++; 687 } 688 } 689 } 690 for (row=0; row<bs; row++) { 691 if (sums[row] > sum) sum = sums[row]; 692 } 693 } 694 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 695 ierr = PetscFree(sums);CHKERRQ(ierr); 696 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 /* 702 Creates the hash table, and sets the table 703 This table is created only once. 704 If new entried need to be added to the matrix 705 then the hash table has to be destroyed and 706 recreated. 707 */ 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 711 { 712 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 713 Mat A = baij->A,B=baij->B; 714 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 715 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 716 PetscErrorCode ierr; 717 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 718 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 719 PetscInt *HT,key; 720 MatScalar **HD; 721 PetscReal tmp; 722 #if defined(PETSC_USE_INFO) 723 PetscInt ct=0,max=0; 724 #endif 725 726 PetscFunctionBegin; 727 if (baij->ht) PetscFunctionReturn(0); 728 729 baij->ht_size = (PetscInt)(factor*nz); 730 ht_size = baij->ht_size; 731 732 /* Allocate Memory for Hash Table */ 733 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 734 HD = baij->hd; 735 HT = baij->ht; 736 737 /* Loop Over A */ 738 for (i=0; i<a->mbs; i++) { 739 for (j=ai[i]; j<ai[i+1]; j++) { 740 row = i+rstart; 741 col = aj[j]+cstart; 742 743 key = row*Nbs + col + 1; 744 h1 = HASH(ht_size,key,tmp); 745 for (k=0; k<ht_size; k++) { 746 if (!HT[(h1+k)%ht_size]) { 747 HT[(h1+k)%ht_size] = key; 748 HD[(h1+k)%ht_size] = a->a + j*bs2; 749 break; 750 #if defined(PETSC_USE_INFO) 751 } else { 752 ct++; 753 #endif 754 } 755 } 756 #if defined(PETSC_USE_INFO) 757 if (k> max) max = k; 758 #endif 759 } 760 } 761 /* Loop Over B */ 762 for (i=0; i<b->mbs; i++) { 763 for (j=bi[i]; j<bi[i+1]; j++) { 764 row = i+rstart; 765 col = garray[bj[j]]; 766 key = row*Nbs + col + 1; 767 h1 = HASH(ht_size,key,tmp); 768 for (k=0; k<ht_size; k++) { 769 if (!HT[(h1+k)%ht_size]) { 770 HT[(h1+k)%ht_size] = key; 771 HD[(h1+k)%ht_size] = b->a + j*bs2; 772 break; 773 #if defined(PETSC_USE_INFO) 774 } else { 775 ct++; 776 #endif 777 } 778 } 779 #if defined(PETSC_USE_INFO) 780 if (k> max) max = k; 781 #endif 782 } 783 } 784 785 /* Print Summary */ 786 #if defined(PETSC_USE_INFO) 787 for (i=0,j=0; i<ht_size; i++) { 788 if (HT[i]) j++; 789 } 790 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 791 #endif 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 797 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 InsertMode addv; 803 804 PetscFunctionBegin; 805 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 806 807 /* make sure all processors are either in INSERTMODE or ADDMODE */ 808 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 809 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 810 mat->insertmode = addv; /* in case this processor had no cache */ 811 812 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 813 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 814 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 815 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 816 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 817 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 823 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 824 { 825 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 826 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 827 PetscErrorCode ierr; 828 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 829 PetscInt *row,*col; 830 PetscBool r1,r2,r3,other_disassembled; 831 MatScalar *val; 832 InsertMode addv = mat->insertmode; 833 PetscMPIInt n; 834 835 PetscFunctionBegin; 836 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 837 if (!baij->donotstash && !mat->nooffprocentries) { 838 while (1) { 839 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 840 if (!flg) break; 841 842 for (i=0; i<n;) { 843 /* Now identify the consecutive vals belonging to the same row */ 844 for (j=i,rstart=row[j]; j<n; j++) { 845 if (row[j] != rstart) break; 846 } 847 if (j < n) ncols = j-i; 848 else ncols = n-i; 849 /* Now assemble all these values with a single function call */ 850 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 851 i = j; 852 } 853 } 854 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 855 /* Now process the block-stash. Since the values are stashed column-oriented, 856 set the roworiented flag to column oriented, and after MatSetValues() 857 restore the original flags */ 858 r1 = baij->roworiented; 859 r2 = a->roworiented; 860 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 861 862 baij->roworiented = PETSC_FALSE; 863 a->roworiented = PETSC_FALSE; 864 865 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 866 while (1) { 867 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 868 if (!flg) break; 869 870 for (i=0; i<n;) { 871 /* Now identify the consecutive vals belonging to the same row */ 872 for (j=i,rstart=row[j]; j<n; j++) { 873 if (row[j] != rstart) break; 874 } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 878 i = j; 879 } 880 } 881 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 882 883 baij->roworiented = r1; 884 a->roworiented = r2; 885 886 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 887 } 888 889 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 890 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 891 892 /* determine if any processor has disassembled, if so we must 893 also disassemble ourselfs, in order that we may reassemble. */ 894 /* 895 if nonzero structure of submatrix B cannot change then we know that 896 no processor disassembled thus we can skip this stuff 897 */ 898 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 899 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 900 if (mat->was_assembled && !other_disassembled) { 901 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 902 } 903 } 904 905 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 906 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 907 } 908 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 910 911 #if defined(PETSC_USE_INFO) 912 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 913 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 914 915 baij->ht_total_ct = 0; 916 baij->ht_insert_ct = 0; 917 } 918 #endif 919 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 920 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 921 922 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 923 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 924 } 925 926 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 927 928 baij->rowvalues = 0; 929 { 930 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 931 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 932 } 933 PetscFunctionReturn(0); 934 } 935 936 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 937 #include <petscdraw.h> 938 #undef __FUNCT__ 939 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 940 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 941 { 942 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 943 PetscErrorCode ierr; 944 PetscMPIInt rank = baij->rank; 945 PetscInt bs = mat->rmap->bs; 946 PetscBool iascii,isdraw; 947 PetscViewer sviewer; 948 PetscViewerFormat format; 949 950 PetscFunctionBegin; 951 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 952 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 953 if (iascii) { 954 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 955 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 956 MatInfo info; 957 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 958 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 959 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 961 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 962 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 963 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 964 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 965 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 966 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 967 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 968 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 969 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } else if (format == PETSC_VIEWER_ASCII_INFO) { 972 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 975 PetscFunctionReturn(0); 976 } 977 } 978 979 if (isdraw) { 980 PetscDraw draw; 981 PetscBool isnull; 982 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 983 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 984 } 985 986 { 987 /* assemble the entire matrix onto first processor. */ 988 Mat A; 989 Mat_SeqBAIJ *Aloc; 990 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 991 MatScalar *a; 992 993 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 994 /* Perhaps this should be the type of mat? */ 995 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 996 if (!rank) { 997 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 998 } else { 999 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1000 } 1001 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1002 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1003 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1004 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1005 1006 /* copy over the A part */ 1007 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1008 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1009 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1010 1011 for (i=0; i<mbs; i++) { 1012 rvals[0] = bs*(baij->rstartbs + i); 1013 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1014 for (j=ai[i]; j<ai[i+1]; j++) { 1015 col = (baij->cstartbs+aj[j])*bs; 1016 for (k=0; k<bs; k++) { 1017 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1018 col++; a += bs; 1019 } 1020 } 1021 } 1022 /* copy over the B part */ 1023 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1024 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = baij->garray[aj[j]]*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 ierr = PetscFree(rvals);CHKERRQ(ierr); 1037 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 /* 1040 Everyone has to call to draw the matrix since the graphics waits are 1041 synchronized across all processors that share the PetscDraw object 1042 */ 1043 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1044 if (!rank) { 1045 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1046 } 1047 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1048 ierr = MatDestroy(&A);CHKERRQ(ierr); 1049 } 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1055 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1056 { 1057 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1058 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1059 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1060 PetscErrorCode ierr; 1061 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1062 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1063 int fd; 1064 PetscScalar *column_values; 1065 FILE *file; 1066 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1067 PetscInt message_count,flowcontrolcount; 1068 1069 PetscFunctionBegin; 1070 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1071 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1072 nz = bs2*(A->nz + B->nz); 1073 rlen = mat->rmap->n; 1074 if (!rank) { 1075 header[0] = MAT_FILE_CLASSID; 1076 header[1] = mat->rmap->N; 1077 header[2] = mat->cmap->N; 1078 1079 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1080 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1081 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1082 /* get largest number of rows any processor has */ 1083 range = mat->rmap->range; 1084 for (i=1; i<size; i++) { 1085 rlen = PetscMax(rlen,range[i+1] - range[i]); 1086 } 1087 } else { 1088 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1089 } 1090 1091 ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr); 1092 /* compute lengths of each row */ 1093 for (i=0; i<a->mbs; i++) { 1094 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1095 } 1096 /* store the row lengths to the file */ 1097 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1098 if (!rank) { 1099 MPI_Status status; 1100 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1101 rlen = (range[1] - range[0])/bs; 1102 for (i=0; i<rlen; i++) { 1103 for (j=0; j<bs; j++) { 1104 row_lens[i*bs+j] = bs*crow_lens[i]; 1105 } 1106 } 1107 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1108 for (i=1; i<size; i++) { 1109 rlen = (range[i+1] - range[i])/bs; 1110 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1111 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1112 for (k=0; k<rlen; k++) { 1113 for (j=0; j<bs; j++) { 1114 row_lens[k*bs+j] = bs*crow_lens[k]; 1115 } 1116 } 1117 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1118 } 1119 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1120 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1121 } else { 1122 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1123 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1124 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1125 } 1126 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1127 1128 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1129 information needed to make it for each row from a block row. This does require more communication but still not more than 1130 the communication needed for the nonzero values */ 1131 nzmax = nz; /* space a largest processor needs */ 1132 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1133 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1134 cnt = 0; 1135 for (i=0; i<a->mbs; i++) { 1136 pcnt = cnt; 1137 for (j=B->i[i]; j<B->i[i+1]; j++) { 1138 if ((col = garray[B->j[j]]) > cstart) break; 1139 for (l=0; l<bs; l++) { 1140 column_indices[cnt++] = bs*col+l; 1141 } 1142 } 1143 for (k=A->i[i]; k<A->i[i+1]; k++) { 1144 for (l=0; l<bs; l++) { 1145 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1146 } 1147 } 1148 for (; j<B->i[i+1]; j++) { 1149 for (l=0; l<bs; l++) { 1150 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1151 } 1152 } 1153 len = cnt - pcnt; 1154 for (k=1; k<bs; k++) { 1155 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1156 cnt += len; 1157 } 1158 } 1159 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1160 1161 /* store the columns to the file */ 1162 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1163 if (!rank) { 1164 MPI_Status status; 1165 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1166 for (i=1; i<size; i++) { 1167 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1168 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1169 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1170 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1171 } 1172 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1173 } else { 1174 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1175 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1176 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1177 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1178 } 1179 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1180 1181 /* load up the numerical values */ 1182 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1183 cnt = 0; 1184 for (i=0; i<a->mbs; i++) { 1185 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1186 for (j=B->i[i]; j<B->i[i+1]; j++) { 1187 if (garray[B->j[j]] > cstart) break; 1188 for (l=0; l<bs; l++) { 1189 for (ll=0; ll<bs; ll++) { 1190 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1191 } 1192 } 1193 cnt += bs; 1194 } 1195 for (k=A->i[i]; k<A->i[i+1]; k++) { 1196 for (l=0; l<bs; l++) { 1197 for (ll=0; ll<bs; ll++) { 1198 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1199 } 1200 } 1201 cnt += bs; 1202 } 1203 for (; j<B->i[i+1]; j++) { 1204 for (l=0; l<bs; l++) { 1205 for (ll=0; ll<bs; ll++) { 1206 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1207 } 1208 } 1209 cnt += bs; 1210 } 1211 cnt += (bs-1)*rlen; 1212 } 1213 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1214 1215 /* store the column values to the file */ 1216 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1217 if (!rank) { 1218 MPI_Status status; 1219 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1220 for (i=1; i<size; i++) { 1221 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1222 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1223 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1224 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1225 } 1226 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1227 } else { 1228 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1229 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1230 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1232 } 1233 ierr = PetscFree(column_values);CHKERRQ(ierr); 1234 1235 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1236 if (file) { 1237 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1238 } 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatView_MPIBAIJ" 1244 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1245 { 1246 PetscErrorCode ierr; 1247 PetscBool iascii,isdraw,issocket,isbinary; 1248 1249 PetscFunctionBegin; 1250 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1251 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1252 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1254 if (iascii || isdraw || issocket) { 1255 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1256 } else if (isbinary) { 1257 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1264 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1265 { 1266 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1267 PetscErrorCode ierr; 1268 1269 PetscFunctionBegin; 1270 #if defined(PETSC_USE_LOG) 1271 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1272 #endif 1273 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1274 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1275 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1276 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1277 #if defined(PETSC_USE_CTABLE) 1278 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1279 #else 1280 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1281 #endif 1282 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1283 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1284 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1285 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1286 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1287 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1288 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1289 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1290 1291 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatMult_MPIBAIJ" 1306 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1307 { 1308 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1309 PetscErrorCode ierr; 1310 PetscInt nt; 1311 1312 PetscFunctionBegin; 1313 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1314 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1315 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1316 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1317 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1318 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1319 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1320 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1326 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1327 { 1328 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1329 PetscErrorCode ierr; 1330 1331 PetscFunctionBegin; 1332 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1333 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1334 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1341 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1342 { 1343 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1344 PetscErrorCode ierr; 1345 PetscBool merged; 1346 1347 PetscFunctionBegin; 1348 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1349 /* do nondiagonal part */ 1350 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1351 if (!merged) { 1352 /* send it on its way */ 1353 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1354 /* do local part */ 1355 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1356 /* receive remote parts: note this assumes the values are not actually */ 1357 /* inserted in yy until the next line */ 1358 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1359 } else { 1360 /* do local part */ 1361 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1362 /* send it on its way */ 1363 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1364 /* values actually were received in the Begin() but we need to call this nop */ 1365 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1366 } 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1372 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1373 { 1374 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 /* do nondiagonal part */ 1379 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1380 /* send it on its way */ 1381 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1382 /* do local part */ 1383 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1384 /* receive remote parts: note this assumes the values are not actually */ 1385 /* inserted in yy until the next line, which is true for my implementation*/ 1386 /* but is not perhaps always true. */ 1387 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1388 PetscFunctionReturn(0); 1389 } 1390 1391 /* 1392 This only works correctly for square matrices where the subblock A->A is the 1393 diagonal block 1394 */ 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1397 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1398 { 1399 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1400 PetscErrorCode ierr; 1401 1402 PetscFunctionBegin; 1403 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1404 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatScale_MPIBAIJ" 1410 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1411 { 1412 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1413 PetscErrorCode ierr; 1414 1415 PetscFunctionBegin; 1416 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1417 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1423 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1424 { 1425 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1426 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1427 PetscErrorCode ierr; 1428 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1429 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1430 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1431 1432 PetscFunctionBegin; 1433 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1434 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1435 mat->getrowactive = PETSC_TRUE; 1436 1437 if (!mat->rowvalues && (idx || v)) { 1438 /* 1439 allocate enough space to hold information from the longest row. 1440 */ 1441 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1442 PetscInt max = 1,mbs = mat->mbs,tmp; 1443 for (i=0; i<mbs; i++) { 1444 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1445 if (max < tmp) max = tmp; 1446 } 1447 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1448 } 1449 lrow = row - brstart; 1450 1451 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1452 if (!v) {pvA = 0; pvB = 0;} 1453 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1454 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1455 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1456 nztot = nzA + nzB; 1457 1458 cmap = mat->garray; 1459 if (v || idx) { 1460 if (nztot) { 1461 /* Sort by increasing column numbers, assuming A and B already sorted */ 1462 PetscInt imark = -1; 1463 if (v) { 1464 *v = v_p = mat->rowvalues; 1465 for (i=0; i<nzB; i++) { 1466 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1467 else break; 1468 } 1469 imark = i; 1470 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1471 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1472 } 1473 if (idx) { 1474 *idx = idx_p = mat->rowindices; 1475 if (imark > -1) { 1476 for (i=0; i<imark; i++) { 1477 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1478 } 1479 } else { 1480 for (i=0; i<nzB; i++) { 1481 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1482 else break; 1483 } 1484 imark = i; 1485 } 1486 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1487 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1488 } 1489 } else { 1490 if (idx) *idx = 0; 1491 if (v) *v = 0; 1492 } 1493 } 1494 *nz = nztot; 1495 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1496 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1502 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1503 { 1504 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1505 1506 PetscFunctionBegin; 1507 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1508 baij->getrowactive = PETSC_FALSE; 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1514 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1515 { 1516 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1517 PetscErrorCode ierr; 1518 1519 PetscFunctionBegin; 1520 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1521 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1522 PetscFunctionReturn(0); 1523 } 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1527 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1528 { 1529 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1530 Mat A = a->A,B = a->B; 1531 PetscErrorCode ierr; 1532 PetscReal isend[5],irecv[5]; 1533 1534 PetscFunctionBegin; 1535 info->block_size = (PetscReal)matin->rmap->bs; 1536 1537 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1538 1539 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1540 isend[3] = info->memory; isend[4] = info->mallocs; 1541 1542 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1543 1544 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1545 isend[3] += info->memory; isend[4] += info->mallocs; 1546 1547 if (flag == MAT_LOCAL) { 1548 info->nz_used = isend[0]; 1549 info->nz_allocated = isend[1]; 1550 info->nz_unneeded = isend[2]; 1551 info->memory = isend[3]; 1552 info->mallocs = isend[4]; 1553 } else if (flag == MAT_GLOBAL_MAX) { 1554 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1555 1556 info->nz_used = irecv[0]; 1557 info->nz_allocated = irecv[1]; 1558 info->nz_unneeded = irecv[2]; 1559 info->memory = irecv[3]; 1560 info->mallocs = irecv[4]; 1561 } else if (flag == MAT_GLOBAL_SUM) { 1562 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1563 1564 info->nz_used = irecv[0]; 1565 info->nz_allocated = irecv[1]; 1566 info->nz_unneeded = irecv[2]; 1567 info->memory = irecv[3]; 1568 info->mallocs = irecv[4]; 1569 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1570 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1571 info->fill_ratio_needed = 0; 1572 info->factor_mallocs = 0; 1573 PetscFunctionReturn(0); 1574 } 1575 1576 #undef __FUNCT__ 1577 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1578 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1579 { 1580 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1581 PetscErrorCode ierr; 1582 1583 PetscFunctionBegin; 1584 switch (op) { 1585 case MAT_NEW_NONZERO_LOCATIONS: 1586 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1587 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1588 case MAT_KEEP_NONZERO_PATTERN: 1589 case MAT_NEW_NONZERO_LOCATION_ERR: 1590 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1591 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1592 break; 1593 case MAT_ROW_ORIENTED: 1594 a->roworiented = flg; 1595 1596 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1597 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1598 break; 1599 case MAT_NEW_DIAGONALS: 1600 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1601 break; 1602 case MAT_IGNORE_OFF_PROC_ENTRIES: 1603 a->donotstash = flg; 1604 break; 1605 case MAT_USE_HASH_TABLE: 1606 a->ht_flag = flg; 1607 break; 1608 case MAT_SYMMETRIC: 1609 case MAT_STRUCTURALLY_SYMMETRIC: 1610 case MAT_HERMITIAN: 1611 case MAT_SYMMETRY_ETERNAL: 1612 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1613 break; 1614 default: 1615 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1622 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1623 { 1624 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1625 Mat_SeqBAIJ *Aloc; 1626 Mat B; 1627 PetscErrorCode ierr; 1628 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1629 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1630 MatScalar *a; 1631 1632 PetscFunctionBegin; 1633 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1634 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1635 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1636 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1637 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1638 /* Do not know preallocation information, but must set block size */ 1639 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1640 } else { 1641 B = *matout; 1642 } 1643 1644 /* copy over the A part */ 1645 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1646 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1647 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1648 1649 for (i=0; i<mbs; i++) { 1650 rvals[0] = bs*(baij->rstartbs + i); 1651 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1652 for (j=ai[i]; j<ai[i+1]; j++) { 1653 col = (baij->cstartbs+aj[j])*bs; 1654 for (k=0; k<bs; k++) { 1655 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1656 1657 col++; a += bs; 1658 } 1659 } 1660 } 1661 /* copy over the B part */ 1662 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1663 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1664 for (i=0; i<mbs; i++) { 1665 rvals[0] = bs*(baij->rstartbs + i); 1666 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1667 for (j=ai[i]; j<ai[i+1]; j++) { 1668 col = baij->garray[aj[j]]*bs; 1669 for (k=0; k<bs; k++) { 1670 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1671 col++; 1672 a += bs; 1673 } 1674 } 1675 } 1676 ierr = PetscFree(rvals);CHKERRQ(ierr); 1677 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1678 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1679 1680 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1681 else { 1682 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1683 } 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1689 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1690 { 1691 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1692 Mat a = baij->A,b = baij->B; 1693 PetscErrorCode ierr; 1694 PetscInt s1,s2,s3; 1695 1696 PetscFunctionBegin; 1697 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1698 if (rr) { 1699 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1700 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1701 /* Overlap communication with computation. */ 1702 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1703 } 1704 if (ll) { 1705 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1706 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1707 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1708 } 1709 /* scale the diagonal block */ 1710 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1711 1712 if (rr) { 1713 /* Do a scatter end and then right scale the off-diagonal block */ 1714 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1715 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1716 } 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1722 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1723 { 1724 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1725 PetscInt *owners = A->rmap->range; 1726 PetscInt n = A->rmap->n; 1727 PetscMPIInt size = l->size; 1728 PetscSF sf; 1729 PetscInt *lrows; 1730 PetscSFNode *rrows; 1731 PetscInt lastidx = -1, r, p = 0, len = 0; 1732 PetscErrorCode ierr; 1733 1734 PetscFunctionBegin; 1735 /* Create SF where leaves are input rows and roots are owned rows */ 1736 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1737 for (r = 0; r < n; ++r) lrows[r] = -1; 1738 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1739 for (r = 0; r < N; ++r) { 1740 const PetscInt idx = rows[r]; 1741 PetscBool found = PETSC_FALSE; 1742 /* Trick for efficient searching for sorted rows */ 1743 if (lastidx > idx) p = 0; 1744 lastidx = idx; 1745 for (; p < size; ++p) { 1746 if (idx >= owners[p] && idx < owners[p+1]) { 1747 rrows[r].rank = p; 1748 rrows[r].index = rows[r] - owners[p]; 1749 found = PETSC_TRUE; 1750 break; 1751 } 1752 } 1753 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 1754 } 1755 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1756 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1757 /* Collect flags for rows to be zeroed */ 1758 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1759 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1760 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1761 /* Compress and put in row numbers */ 1762 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1763 /* fix right hand side if needed */ 1764 if (x && b) { 1765 const PetscScalar *xx; 1766 PetscScalar *bb; 1767 1768 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1769 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1770 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1771 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1772 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1773 } 1774 1775 /* actually zap the local rows */ 1776 /* 1777 Zero the required rows. If the "diagonal block" of the matrix 1778 is square and the user wishes to set the diagonal we use separate 1779 code so that MatSetValues() is not called for each diagonal allocating 1780 new memory, thus calling lots of mallocs and slowing things down. 1781 1782 */ 1783 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1784 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr); 1785 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1786 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr); 1787 } else if (diag != 0.0) { 1788 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1789 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1790 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1791 for (r = 0; r < len; ++r) { 1792 const PetscInt row = lrows[r] + A->rmap->rstart; 1793 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1794 } 1795 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1797 } else { 1798 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1799 } 1800 ierr = PetscFree(lrows);CHKERRQ(ierr); 1801 { 1802 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1803 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1804 } 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1810 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1811 { 1812 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1813 PetscErrorCode ierr; 1814 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 1815 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1816 PetscInt *lrows,*owners = A->rmap->range; 1817 PetscSFNode *rrows; 1818 PetscSF sf; 1819 const PetscScalar *xx; 1820 PetscScalar *bb,*mask; 1821 Vec xmask,lmask; 1822 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1823 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1824 PetscScalar *aa; 1825 #if defined(PETSC_DEBUG) 1826 PetscBool found = PETSC_FALSE; 1827 #endif 1828 1829 PetscFunctionBegin; 1830 /* Create SF where leaves are input rows and roots are owned rows */ 1831 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1832 for (r = 0; r < n; ++r) lrows[r] = -1; 1833 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1834 for (r = 0; r < N; ++r) { 1835 const PetscInt idx = rows[r]; 1836 PetscBool found = PETSC_FALSE; 1837 /* Trick for efficient searching for sorted rows */ 1838 if (lastidx > idx) p = 0; 1839 lastidx = idx; 1840 for (; p < size; ++p) { 1841 if (idx >= owners[p] && idx < owners[p+1]) { 1842 rrows[r].rank = p; 1843 rrows[r].index = rows[r] - owners[p]; 1844 found = PETSC_TRUE; 1845 break; 1846 } 1847 } 1848 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 1849 } 1850 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1851 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1852 /* Collect flags for rows to be zeroed */ 1853 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1854 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1855 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1856 /* Compress and put in row numbers */ 1857 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1858 /* zero diagonal part of matrix */ 1859 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1860 /* handle off diagonal part of matrix */ 1861 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1862 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1863 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1864 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1865 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1866 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1867 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1868 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1869 if (x) { 1870 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1871 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1872 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1873 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1874 } 1875 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1876 /* remove zeroed rows of off diagonal matrix */ 1877 for (i = 0; i < len; ++i) { 1878 row = lrows[i]; 1879 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1880 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1881 for (k = 0; k < count; ++k) { 1882 aa[0] = 0.0; 1883 aa += bs; 1884 } 1885 } 1886 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1887 for (i = 0; i < l->B->rmap->N; ++i) { 1888 row = i/bs; 1889 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1890 for (k = 0; k < bs; ++k) { 1891 col = bs*baij->j[j] + k; 1892 if (PetscAbsScalar(mask[col])) { 1893 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1894 if (b) bb[i] -= aa[0]*xx[col]; 1895 aa[0] = 0.0; 1896 } 1897 } 1898 } 1899 } 1900 if (x) { 1901 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1902 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1903 } 1904 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1905 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1906 ierr = PetscFree(lrows);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1912 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1913 { 1914 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1915 PetscErrorCode ierr; 1916 1917 PetscFunctionBegin; 1918 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1923 1924 #undef __FUNCT__ 1925 #define __FUNCT__ "MatEqual_MPIBAIJ" 1926 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1927 { 1928 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1929 Mat a,b,c,d; 1930 PetscBool flg; 1931 PetscErrorCode ierr; 1932 1933 PetscFunctionBegin; 1934 a = matA->A; b = matA->B; 1935 c = matB->A; d = matB->B; 1936 1937 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1938 if (flg) { 1939 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1940 } 1941 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1942 PetscFunctionReturn(0); 1943 } 1944 1945 #undef __FUNCT__ 1946 #define __FUNCT__ "MatCopy_MPIBAIJ" 1947 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1948 { 1949 PetscErrorCode ierr; 1950 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1951 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1952 1953 PetscFunctionBegin; 1954 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1955 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1956 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1957 } else { 1958 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1959 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1960 } 1961 PetscFunctionReturn(0); 1962 } 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1966 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1967 { 1968 PetscErrorCode ierr; 1969 1970 PetscFunctionBegin; 1971 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1972 PetscFunctionReturn(0); 1973 } 1974 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1977 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1978 { 1979 PetscErrorCode ierr; 1980 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1981 PetscBLASInt bnz,one=1; 1982 Mat_SeqBAIJ *x,*y; 1983 1984 PetscFunctionBegin; 1985 if (str == SAME_NONZERO_PATTERN) { 1986 PetscScalar alpha = a; 1987 x = (Mat_SeqBAIJ*)xx->A->data; 1988 y = (Mat_SeqBAIJ*)yy->A->data; 1989 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1990 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1991 x = (Mat_SeqBAIJ*)xx->B->data; 1992 y = (Mat_SeqBAIJ*)yy->B->data; 1993 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1994 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1995 } else { 1996 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1997 } 1998 PetscFunctionReturn(0); 1999 } 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2003 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2004 { 2005 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2006 PetscErrorCode ierr; 2007 2008 PetscFunctionBegin; 2009 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2010 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2011 PetscFunctionReturn(0); 2012 } 2013 2014 #undef __FUNCT__ 2015 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2016 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2017 { 2018 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2019 PetscErrorCode ierr; 2020 2021 PetscFunctionBegin; 2022 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2023 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2024 PetscFunctionReturn(0); 2025 } 2026 2027 #undef __FUNCT__ 2028 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2029 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2030 { 2031 PetscErrorCode ierr; 2032 IS iscol_local; 2033 PetscInt csize; 2034 2035 PetscFunctionBegin; 2036 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2037 if (call == MAT_REUSE_MATRIX) { 2038 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2039 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2040 } else { 2041 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2042 } 2043 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2044 if (call == MAT_INITIAL_MATRIX) { 2045 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2046 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2047 } 2048 PetscFunctionReturn(0); 2049 } 2050 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2051 #undef __FUNCT__ 2052 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2053 /* 2054 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2055 in local and then by concatenating the local matrices the end result. 2056 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2057 */ 2058 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2059 { 2060 PetscErrorCode ierr; 2061 PetscMPIInt rank,size; 2062 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2063 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2064 Mat M,Mreuse; 2065 MatScalar *vwork,*aa; 2066 MPI_Comm comm; 2067 IS isrow_new, iscol_new; 2068 PetscBool idflag,allrows, allcols; 2069 Mat_SeqBAIJ *aij; 2070 2071 PetscFunctionBegin; 2072 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2073 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2074 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2075 /* The compression and expansion should be avoided. Doesn't point 2076 out errors, might change the indices, hence buggey */ 2077 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2078 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2079 2080 /* Check for special case: each processor gets entire matrix columns */ 2081 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2082 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2083 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2084 else allcols = PETSC_FALSE; 2085 2086 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2087 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2088 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2089 else allrows = PETSC_FALSE; 2090 2091 if (call == MAT_REUSE_MATRIX) { 2092 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2093 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2094 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2095 } else { 2096 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2097 } 2098 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2099 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2100 /* 2101 m - number of local rows 2102 n - number of columns (same on all processors) 2103 rstart - first row in new global matrix generated 2104 */ 2105 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2106 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2107 m = m/bs; 2108 n = n/bs; 2109 2110 if (call == MAT_INITIAL_MATRIX) { 2111 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2112 ii = aij->i; 2113 jj = aij->j; 2114 2115 /* 2116 Determine the number of non-zeros in the diagonal and off-diagonal 2117 portions of the matrix in order to do correct preallocation 2118 */ 2119 2120 /* first get start and end of "diagonal" columns */ 2121 if (csize == PETSC_DECIDE) { 2122 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2123 if (mglobal == n*bs) { /* square matrix */ 2124 nlocal = m; 2125 } else { 2126 nlocal = n/size + ((n % size) > rank); 2127 } 2128 } else { 2129 nlocal = csize/bs; 2130 } 2131 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2132 rstart = rend - nlocal; 2133 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2134 2135 /* next, compute all the lengths */ 2136 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2137 for (i=0; i<m; i++) { 2138 jend = ii[i+1] - ii[i]; 2139 olen = 0; 2140 dlen = 0; 2141 for (j=0; j<jend; j++) { 2142 if (*jj < rstart || *jj >= rend) olen++; 2143 else dlen++; 2144 jj++; 2145 } 2146 olens[i] = olen; 2147 dlens[i] = dlen; 2148 } 2149 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2150 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2151 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2152 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2153 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2154 } else { 2155 PetscInt ml,nl; 2156 2157 M = *newmat; 2158 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2159 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2160 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2161 /* 2162 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2163 rather than the slower MatSetValues(). 2164 */ 2165 M->was_assembled = PETSC_TRUE; 2166 M->assembled = PETSC_FALSE; 2167 } 2168 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2169 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2170 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2171 ii = aij->i; 2172 jj = aij->j; 2173 aa = aij->a; 2174 for (i=0; i<m; i++) { 2175 row = rstart/bs + i; 2176 nz = ii[i+1] - ii[i]; 2177 cwork = jj; jj += nz; 2178 vwork = aa; aa += nz*bs*bs; 2179 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2180 } 2181 2182 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2183 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2184 *newmat = M; 2185 2186 /* save submatrix used in processor for next request */ 2187 if (call == MAT_INITIAL_MATRIX) { 2188 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2189 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2190 } 2191 PetscFunctionReturn(0); 2192 } 2193 2194 #undef __FUNCT__ 2195 #define __FUNCT__ "MatPermute_MPIBAIJ" 2196 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2197 { 2198 MPI_Comm comm,pcomm; 2199 PetscInt clocal_size,nrows; 2200 const PetscInt *rows; 2201 PetscMPIInt size; 2202 IS crowp,lcolp; 2203 PetscErrorCode ierr; 2204 2205 PetscFunctionBegin; 2206 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2207 /* make a collective version of 'rowp' */ 2208 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2209 if (pcomm==comm) { 2210 crowp = rowp; 2211 } else { 2212 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2213 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2214 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2215 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2216 } 2217 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2218 /* make a local version of 'colp' */ 2219 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2220 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2221 if (size==1) { 2222 lcolp = colp; 2223 } else { 2224 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2225 } 2226 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2227 /* now we just get the submatrix */ 2228 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2229 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2230 /* clean up */ 2231 if (pcomm!=comm) { 2232 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2233 } 2234 if (size>1) { 2235 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2236 } 2237 PetscFunctionReturn(0); 2238 } 2239 2240 #undef __FUNCT__ 2241 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2242 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2243 { 2244 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2245 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2246 2247 PetscFunctionBegin; 2248 if (nghosts) *nghosts = B->nbs; 2249 if (ghosts) *ghosts = baij->garray; 2250 PetscFunctionReturn(0); 2251 } 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2255 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2256 { 2257 Mat B; 2258 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2259 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2260 Mat_SeqAIJ *b; 2261 PetscErrorCode ierr; 2262 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2263 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2264 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2265 2266 PetscFunctionBegin; 2267 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2268 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2269 2270 /* ---------------------------------------------------------------- 2271 Tell every processor the number of nonzeros per row 2272 */ 2273 ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr); 2274 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2275 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2276 } 2277 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2278 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2279 displs = recvcounts + size; 2280 for (i=0; i<size; i++) { 2281 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2282 displs[i] = A->rmap->range[i]/bs; 2283 } 2284 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2285 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2286 #else 2287 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2288 #endif 2289 /* --------------------------------------------------------------- 2290 Create the sequential matrix of the same type as the local block diagonal 2291 */ 2292 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2293 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2294 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2295 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2296 b = (Mat_SeqAIJ*)B->data; 2297 2298 /*-------------------------------------------------------------------- 2299 Copy my part of matrix column indices over 2300 */ 2301 sendcount = ad->nz + bd->nz; 2302 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2303 a_jsendbuf = ad->j; 2304 b_jsendbuf = bd->j; 2305 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2306 cnt = 0; 2307 for (i=0; i<n; i++) { 2308 2309 /* put in lower diagonal portion */ 2310 m = bd->i[i+1] - bd->i[i]; 2311 while (m > 0) { 2312 /* is it above diagonal (in bd (compressed) numbering) */ 2313 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2314 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2315 m--; 2316 } 2317 2318 /* put in diagonal portion */ 2319 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2320 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2321 } 2322 2323 /* put in upper diagonal portion */ 2324 while (m-- > 0) { 2325 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2326 } 2327 } 2328 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2329 2330 /*-------------------------------------------------------------------- 2331 Gather all column indices to all processors 2332 */ 2333 for (i=0; i<size; i++) { 2334 recvcounts[i] = 0; 2335 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2336 recvcounts[i] += lens[j]; 2337 } 2338 } 2339 displs[0] = 0; 2340 for (i=1; i<size; i++) { 2341 displs[i] = displs[i-1] + recvcounts[i-1]; 2342 } 2343 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2344 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2345 #else 2346 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2347 #endif 2348 /*-------------------------------------------------------------------- 2349 Assemble the matrix into useable form (note numerical values not yet set) 2350 */ 2351 /* set the b->ilen (length of each row) values */ 2352 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2353 /* set the b->i indices */ 2354 b->i[0] = 0; 2355 for (i=1; i<=A->rmap->N/bs; i++) { 2356 b->i[i] = b->i[i-1] + lens[i-1]; 2357 } 2358 ierr = PetscFree(lens);CHKERRQ(ierr); 2359 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2362 2363 if (A->symmetric) { 2364 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2365 } else if (A->hermitian) { 2366 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2367 } else if (A->structurally_symmetric) { 2368 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2369 } 2370 *newmat = B; 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatSOR_MPIBAIJ" 2376 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2377 { 2378 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2379 PetscErrorCode ierr; 2380 Vec bb1 = 0; 2381 2382 PetscFunctionBegin; 2383 if (flag == SOR_APPLY_UPPER) { 2384 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2389 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2390 } 2391 2392 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2393 if (flag & SOR_ZERO_INITIAL_GUESS) { 2394 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2395 its--; 2396 } 2397 2398 while (its--) { 2399 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2400 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2401 2402 /* update rhs: bb1 = bb - B*x */ 2403 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2404 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2405 2406 /* local sweep */ 2407 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2408 } 2409 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2410 if (flag & SOR_ZERO_INITIAL_GUESS) { 2411 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2412 its--; 2413 } 2414 while (its--) { 2415 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2416 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2417 2418 /* update rhs: bb1 = bb - B*x */ 2419 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2420 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2421 2422 /* local sweep */ 2423 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2424 } 2425 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2426 if (flag & SOR_ZERO_INITIAL_GUESS) { 2427 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2428 its--; 2429 } 2430 while (its--) { 2431 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2432 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2433 2434 /* update rhs: bb1 = bb - B*x */ 2435 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2436 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2437 2438 /* local sweep */ 2439 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2440 } 2441 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2442 2443 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2449 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2450 { 2451 PetscErrorCode ierr; 2452 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2453 PetscInt N,i,*garray = aij->garray; 2454 PetscInt ib,jb,bs = A->rmap->bs; 2455 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2456 MatScalar *a_val = a_aij->a; 2457 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2458 MatScalar *b_val = b_aij->a; 2459 PetscReal *work; 2460 2461 PetscFunctionBegin; 2462 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2463 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2464 if (type == NORM_2) { 2465 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2466 for (jb=0; jb<bs; jb++) { 2467 for (ib=0; ib<bs; ib++) { 2468 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2469 a_val++; 2470 } 2471 } 2472 } 2473 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2474 for (jb=0; jb<bs; jb++) { 2475 for (ib=0; ib<bs; ib++) { 2476 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2477 b_val++; 2478 } 2479 } 2480 } 2481 } else if (type == NORM_1) { 2482 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2483 for (jb=0; jb<bs; jb++) { 2484 for (ib=0; ib<bs; ib++) { 2485 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2486 a_val++; 2487 } 2488 } 2489 } 2490 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2491 for (jb=0; jb<bs; jb++) { 2492 for (ib=0; ib<bs; ib++) { 2493 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2494 b_val++; 2495 } 2496 } 2497 } 2498 } else if (type == NORM_INFINITY) { 2499 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2500 for (jb=0; jb<bs; jb++) { 2501 for (ib=0; ib<bs; ib++) { 2502 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2503 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2504 a_val++; 2505 } 2506 } 2507 } 2508 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2509 for (jb=0; jb<bs; jb++) { 2510 for (ib=0; ib<bs; ib++) { 2511 int col = garray[b_aij->j[i]] * bs + jb; 2512 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2513 b_val++; 2514 } 2515 } 2516 } 2517 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2518 if (type == NORM_INFINITY) { 2519 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2520 } else { 2521 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2522 } 2523 ierr = PetscFree(work);CHKERRQ(ierr); 2524 if (type == NORM_2) { 2525 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2526 } 2527 PetscFunctionReturn(0); 2528 } 2529 2530 #undef __FUNCT__ 2531 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2532 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2533 { 2534 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2535 PetscErrorCode ierr; 2536 2537 PetscFunctionBegin; 2538 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2539 PetscFunctionReturn(0); 2540 } 2541 2542 2543 /* -------------------------------------------------------------------*/ 2544 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2545 MatGetRow_MPIBAIJ, 2546 MatRestoreRow_MPIBAIJ, 2547 MatMult_MPIBAIJ, 2548 /* 4*/ MatMultAdd_MPIBAIJ, 2549 MatMultTranspose_MPIBAIJ, 2550 MatMultTransposeAdd_MPIBAIJ, 2551 0, 2552 0, 2553 0, 2554 /*10*/ 0, 2555 0, 2556 0, 2557 MatSOR_MPIBAIJ, 2558 MatTranspose_MPIBAIJ, 2559 /*15*/ MatGetInfo_MPIBAIJ, 2560 MatEqual_MPIBAIJ, 2561 MatGetDiagonal_MPIBAIJ, 2562 MatDiagonalScale_MPIBAIJ, 2563 MatNorm_MPIBAIJ, 2564 /*20*/ MatAssemblyBegin_MPIBAIJ, 2565 MatAssemblyEnd_MPIBAIJ, 2566 MatSetOption_MPIBAIJ, 2567 MatZeroEntries_MPIBAIJ, 2568 /*24*/ MatZeroRows_MPIBAIJ, 2569 0, 2570 0, 2571 0, 2572 0, 2573 /*29*/ MatSetUp_MPIBAIJ, 2574 0, 2575 0, 2576 0, 2577 0, 2578 /*34*/ MatDuplicate_MPIBAIJ, 2579 0, 2580 0, 2581 0, 2582 0, 2583 /*39*/ MatAXPY_MPIBAIJ, 2584 MatGetSubMatrices_MPIBAIJ, 2585 MatIncreaseOverlap_MPIBAIJ, 2586 MatGetValues_MPIBAIJ, 2587 MatCopy_MPIBAIJ, 2588 /*44*/ 0, 2589 MatScale_MPIBAIJ, 2590 0, 2591 0, 2592 MatZeroRowsColumns_MPIBAIJ, 2593 /*49*/ 0, 2594 0, 2595 0, 2596 0, 2597 0, 2598 /*54*/ MatFDColoringCreate_MPIXAIJ, 2599 0, 2600 MatSetUnfactored_MPIBAIJ, 2601 MatPermute_MPIBAIJ, 2602 MatSetValuesBlocked_MPIBAIJ, 2603 /*59*/ MatGetSubMatrix_MPIBAIJ, 2604 MatDestroy_MPIBAIJ, 2605 MatView_MPIBAIJ, 2606 0, 2607 0, 2608 /*64*/ 0, 2609 0, 2610 0, 2611 0, 2612 0, 2613 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2614 0, 2615 0, 2616 0, 2617 0, 2618 /*74*/ 0, 2619 MatFDColoringApply_BAIJ, 2620 0, 2621 0, 2622 0, 2623 /*79*/ 0, 2624 0, 2625 0, 2626 0, 2627 MatLoad_MPIBAIJ, 2628 /*84*/ 0, 2629 0, 2630 0, 2631 0, 2632 0, 2633 /*89*/ 0, 2634 0, 2635 0, 2636 0, 2637 0, 2638 /*94*/ 0, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /*99*/ 0, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /*104*/0, 2649 MatRealPart_MPIBAIJ, 2650 MatImaginaryPart_MPIBAIJ, 2651 0, 2652 0, 2653 /*109*/0, 2654 0, 2655 0, 2656 0, 2657 0, 2658 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2659 0, 2660 MatGetGhosts_MPIBAIJ, 2661 0, 2662 0, 2663 /*119*/0, 2664 0, 2665 0, 2666 0, 2667 MatGetMultiProcBlock_MPIBAIJ, 2668 /*124*/0, 2669 MatGetColumnNorms_MPIBAIJ, 2670 MatInvertBlockDiagonal_MPIBAIJ, 2671 0, 2672 0, 2673 /*129*/ 0, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*134*/ 0, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /*139*/ 0, 2684 0, 2685 0, 2686 MatFDColoringSetUp_MPIXAIJ 2687 }; 2688 2689 #undef __FUNCT__ 2690 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2691 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2692 { 2693 PetscFunctionBegin; 2694 *a = ((Mat_MPIBAIJ*)A->data)->A; 2695 PetscFunctionReturn(0); 2696 } 2697 2698 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2699 2700 #undef __FUNCT__ 2701 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2702 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2703 { 2704 PetscInt m,rstart,cstart,cend; 2705 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2706 const PetscInt *JJ =0; 2707 PetscScalar *values=0; 2708 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2709 PetscErrorCode ierr; 2710 2711 PetscFunctionBegin; 2712 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2713 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2714 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2715 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2716 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2717 m = B->rmap->n/bs; 2718 rstart = B->rmap->rstart/bs; 2719 cstart = B->cmap->rstart/bs; 2720 cend = B->cmap->rend/bs; 2721 2722 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2723 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2724 for (i=0; i<m; i++) { 2725 nz = ii[i+1] - ii[i]; 2726 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2727 nz_max = PetscMax(nz_max,nz); 2728 JJ = jj + ii[i]; 2729 for (j=0; j<nz; j++) { 2730 if (*JJ >= cstart) break; 2731 JJ++; 2732 } 2733 d = 0; 2734 for (; j<nz; j++) { 2735 if (*JJ++ >= cend) break; 2736 d++; 2737 } 2738 d_nnz[i] = d; 2739 o_nnz[i] = nz - d; 2740 } 2741 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2742 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2743 2744 values = (PetscScalar*)V; 2745 if (!values) { 2746 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2747 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2748 } 2749 for (i=0; i<m; i++) { 2750 PetscInt row = i + rstart; 2751 PetscInt ncols = ii[i+1] - ii[i]; 2752 const PetscInt *icols = jj + ii[i]; 2753 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2754 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2755 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2756 } else { /* block ordering does not match so we can only insert one block at a time. */ 2757 PetscInt j; 2758 for (j=0; j<ncols; j++) { 2759 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2760 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2761 } 2762 } 2763 } 2764 2765 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2766 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2767 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2768 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2769 PetscFunctionReturn(0); 2770 } 2771 2772 #undef __FUNCT__ 2773 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2774 /*@C 2775 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2776 (the default parallel PETSc format). 2777 2778 Collective on MPI_Comm 2779 2780 Input Parameters: 2781 + A - the matrix 2782 . bs - the block size 2783 . i - the indices into j for the start of each local row (starts with zero) 2784 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2785 - v - optional values in the matrix 2786 2787 Level: developer 2788 2789 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2790 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2791 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2792 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2793 block column and the second index is over columns within a block. 2794 2795 .keywords: matrix, aij, compressed row, sparse, parallel 2796 2797 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2798 @*/ 2799 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2800 { 2801 PetscErrorCode ierr; 2802 2803 PetscFunctionBegin; 2804 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2805 PetscValidType(B,1); 2806 PetscValidLogicalCollectiveInt(B,bs,2); 2807 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2808 PetscFunctionReturn(0); 2809 } 2810 2811 #undef __FUNCT__ 2812 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2813 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2814 { 2815 Mat_MPIBAIJ *b; 2816 PetscErrorCode ierr; 2817 PetscInt i; 2818 2819 PetscFunctionBegin; 2820 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2821 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2822 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2823 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2824 2825 if (d_nnz) { 2826 for (i=0; i<B->rmap->n/bs; i++) { 2827 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2828 } 2829 } 2830 if (o_nnz) { 2831 for (i=0; i<B->rmap->n/bs; i++) { 2832 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2833 } 2834 } 2835 2836 b = (Mat_MPIBAIJ*)B->data; 2837 b->bs2 = bs*bs; 2838 b->mbs = B->rmap->n/bs; 2839 b->nbs = B->cmap->n/bs; 2840 b->Mbs = B->rmap->N/bs; 2841 b->Nbs = B->cmap->N/bs; 2842 2843 for (i=0; i<=b->size; i++) { 2844 b->rangebs[i] = B->rmap->range[i]/bs; 2845 } 2846 b->rstartbs = B->rmap->rstart/bs; 2847 b->rendbs = B->rmap->rend/bs; 2848 b->cstartbs = B->cmap->rstart/bs; 2849 b->cendbs = B->cmap->rend/bs; 2850 2851 if (!B->preallocated) { 2852 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2853 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2854 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2855 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2856 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2857 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2858 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2859 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2860 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2861 } 2862 2863 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2864 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2865 B->preallocated = PETSC_TRUE; 2866 PetscFunctionReturn(0); 2867 } 2868 2869 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2870 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2871 2872 #undef __FUNCT__ 2873 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2874 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2875 { 2876 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2877 PetscErrorCode ierr; 2878 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2879 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2880 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2881 2882 PetscFunctionBegin; 2883 ierr = PetscMalloc1((M+1),&ii);CHKERRQ(ierr); 2884 ii[0] = 0; 2885 for (i=0; i<M; i++) { 2886 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2887 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2888 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2889 /* remove one from count of matrix has diagonal */ 2890 for (j=id[i]; j<id[i+1]; j++) { 2891 if (jd[j] == i) {ii[i+1]--;break;} 2892 } 2893 } 2894 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2895 cnt = 0; 2896 for (i=0; i<M; i++) { 2897 for (j=io[i]; j<io[i+1]; j++) { 2898 if (garray[jo[j]] > rstart) break; 2899 jj[cnt++] = garray[jo[j]]; 2900 } 2901 for (k=id[i]; k<id[i+1]; k++) { 2902 if (jd[k] != i) { 2903 jj[cnt++] = rstart + jd[k]; 2904 } 2905 } 2906 for (; j<io[i+1]; j++) { 2907 jj[cnt++] = garray[jo[j]]; 2908 } 2909 } 2910 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2911 PetscFunctionReturn(0); 2912 } 2913 2914 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2915 2916 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2917 2918 #undef __FUNCT__ 2919 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 2920 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2921 { 2922 PetscErrorCode ierr; 2923 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2924 Mat B; 2925 Mat_MPIAIJ *b; 2926 2927 PetscFunctionBegin; 2928 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2929 2930 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2931 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2932 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2933 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2934 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2935 b = (Mat_MPIAIJ*) B->data; 2936 2937 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2938 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2939 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2940 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2941 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2942 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2943 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2944 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2945 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2946 if (reuse == MAT_REUSE_MATRIX) { 2947 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2948 } else { 2949 *newmat = B; 2950 } 2951 PetscFunctionReturn(0); 2952 } 2953 2954 #if defined(PETSC_HAVE_MUMPS) 2955 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2956 #endif 2957 2958 /*MC 2959 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2960 2961 Options Database Keys: 2962 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2963 . -mat_block_size <bs> - set the blocksize used to store the matrix 2964 - -mat_use_hash_table <fact> 2965 2966 Level: beginner 2967 2968 .seealso: MatCreateMPIBAIJ 2969 M*/ 2970 2971 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatCreate_MPIBAIJ" 2975 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2976 { 2977 Mat_MPIBAIJ *b; 2978 PetscErrorCode ierr; 2979 PetscBool flg; 2980 2981 PetscFunctionBegin; 2982 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2983 B->data = (void*)b; 2984 2985 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2986 B->assembled = PETSC_FALSE; 2987 2988 B->insertmode = NOT_SET_VALUES; 2989 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2990 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2991 2992 /* build local table of row and column ownerships */ 2993 ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr); 2994 2995 /* build cache for off array entries formed */ 2996 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2997 2998 b->donotstash = PETSC_FALSE; 2999 b->colmap = NULL; 3000 b->garray = NULL; 3001 b->roworiented = PETSC_TRUE; 3002 3003 /* stuff used in block assembly */ 3004 b->barray = 0; 3005 3006 /* stuff used for matrix vector multiply */ 3007 b->lvec = 0; 3008 b->Mvctx = 0; 3009 3010 /* stuff for MatGetRow() */ 3011 b->rowindices = 0; 3012 b->rowvalues = 0; 3013 b->getrowactive = PETSC_FALSE; 3014 3015 /* hash table stuff */ 3016 b->ht = 0; 3017 b->hd = 0; 3018 b->ht_size = 0; 3019 b->ht_flag = PETSC_FALSE; 3020 b->ht_fact = 0; 3021 b->ht_total_ct = 0; 3022 b->ht_insert_ct = 0; 3023 3024 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3025 b->ijonly = PETSC_FALSE; 3026 3027 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3028 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3029 if (flg) { 3030 PetscReal fact = 1.39; 3031 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3032 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3033 if (fact <= 1.0) fact = 1.39; 3034 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3035 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3036 } 3037 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3038 3039 #if defined(PETSC_HAVE_MUMPS) 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3041 #endif 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3053 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3054 PetscFunctionReturn(0); 3055 } 3056 3057 /*MC 3058 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3059 3060 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3061 and MATMPIBAIJ otherwise. 3062 3063 Options Database Keys: 3064 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3065 3066 Level: beginner 3067 3068 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3069 M*/ 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3073 /*@C 3074 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3075 (block compressed row). For good matrix assembly performance 3076 the user should preallocate the matrix storage by setting the parameters 3077 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3078 performance can be increased by more than a factor of 50. 3079 3080 Collective on Mat 3081 3082 Input Parameters: 3083 + A - the matrix 3084 . bs - size of block 3085 . d_nz - number of block nonzeros per block row in diagonal portion of local 3086 submatrix (same for all local rows) 3087 . d_nnz - array containing the number of block nonzeros in the various block rows 3088 of the in diagonal portion of the local (possibly different for each block 3089 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3090 set it even if it is zero. 3091 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3092 submatrix (same for all local rows). 3093 - o_nnz - array containing the number of nonzeros in the various block rows of the 3094 off-diagonal portion of the local submatrix (possibly different for 3095 each block row) or NULL. 3096 3097 If the *_nnz parameter is given then the *_nz parameter is ignored 3098 3099 Options Database Keys: 3100 + -mat_block_size - size of the blocks to use 3101 - -mat_use_hash_table <fact> 3102 3103 Notes: 3104 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3105 than it must be used on all processors that share the object for that argument. 3106 3107 Storage Information: 3108 For a square global matrix we define each processor's diagonal portion 3109 to be its local rows and the corresponding columns (a square submatrix); 3110 each processor's off-diagonal portion encompasses the remainder of the 3111 local matrix (a rectangular submatrix). 3112 3113 The user can specify preallocated storage for the diagonal part of 3114 the local submatrix with either d_nz or d_nnz (not both). Set 3115 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3116 memory allocation. Likewise, specify preallocated storage for the 3117 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3118 3119 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3120 the figure below we depict these three local rows and all columns (0-11). 3121 3122 .vb 3123 0 1 2 3 4 5 6 7 8 9 10 11 3124 -------------------------- 3125 row 3 |o o o d d d o o o o o o 3126 row 4 |o o o d d d o o o o o o 3127 row 5 |o o o d d d o o o o o o 3128 -------------------------- 3129 .ve 3130 3131 Thus, any entries in the d locations are stored in the d (diagonal) 3132 submatrix, and any entries in the o locations are stored in the 3133 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3134 stored simply in the MATSEQBAIJ format for compressed row storage. 3135 3136 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3137 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3138 In general, for PDE problems in which most nonzeros are near the diagonal, 3139 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3140 or you will get TERRIBLE performance; see the users' manual chapter on 3141 matrices. 3142 3143 You can call MatGetInfo() to get information on how effective the preallocation was; 3144 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3145 You can also run with the option -info and look for messages with the string 3146 malloc in them to see if additional memory allocation was needed. 3147 3148 Level: intermediate 3149 3150 .keywords: matrix, block, aij, compressed row, sparse, parallel 3151 3152 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3153 @*/ 3154 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3155 { 3156 PetscErrorCode ierr; 3157 3158 PetscFunctionBegin; 3159 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3160 PetscValidType(B,1); 3161 PetscValidLogicalCollectiveInt(B,bs,2); 3162 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3163 PetscFunctionReturn(0); 3164 } 3165 3166 #undef __FUNCT__ 3167 #define __FUNCT__ "MatCreateBAIJ" 3168 /*@C 3169 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3170 (block compressed row). For good matrix assembly performance 3171 the user should preallocate the matrix storage by setting the parameters 3172 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3173 performance can be increased by more than a factor of 50. 3174 3175 Collective on MPI_Comm 3176 3177 Input Parameters: 3178 + comm - MPI communicator 3179 . bs - size of blockk 3180 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3181 This value should be the same as the local size used in creating the 3182 y vector for the matrix-vector product y = Ax. 3183 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3184 This value should be the same as the local size used in creating the 3185 x vector for the matrix-vector product y = Ax. 3186 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3187 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3188 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3189 submatrix (same for all local rows) 3190 . d_nnz - array containing the number of nonzero blocks in the various block rows 3191 of the in diagonal portion of the local (possibly different for each block 3192 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3193 and set it even if it is zero. 3194 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3195 submatrix (same for all local rows). 3196 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3197 off-diagonal portion of the local submatrix (possibly different for 3198 each block row) or NULL. 3199 3200 Output Parameter: 3201 . A - the matrix 3202 3203 Options Database Keys: 3204 + -mat_block_size - size of the blocks to use 3205 - -mat_use_hash_table <fact> 3206 3207 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3208 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3209 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3210 3211 Notes: 3212 If the *_nnz parameter is given then the *_nz parameter is ignored 3213 3214 A nonzero block is any block that as 1 or more nonzeros in it 3215 3216 The user MUST specify either the local or global matrix dimensions 3217 (possibly both). 3218 3219 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3220 than it must be used on all processors that share the object for that argument. 3221 3222 Storage Information: 3223 For a square global matrix we define each processor's diagonal portion 3224 to be its local rows and the corresponding columns (a square submatrix); 3225 each processor's off-diagonal portion encompasses the remainder of the 3226 local matrix (a rectangular submatrix). 3227 3228 The user can specify preallocated storage for the diagonal part of 3229 the local submatrix with either d_nz or d_nnz (not both). Set 3230 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3231 memory allocation. Likewise, specify preallocated storage for the 3232 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3233 3234 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3235 the figure below we depict these three local rows and all columns (0-11). 3236 3237 .vb 3238 0 1 2 3 4 5 6 7 8 9 10 11 3239 -------------------------- 3240 row 3 |o o o d d d o o o o o o 3241 row 4 |o o o d d d o o o o o o 3242 row 5 |o o o d d d o o o o o o 3243 -------------------------- 3244 .ve 3245 3246 Thus, any entries in the d locations are stored in the d (diagonal) 3247 submatrix, and any entries in the o locations are stored in the 3248 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3249 stored simply in the MATSEQBAIJ format for compressed row storage. 3250 3251 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3252 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3253 In general, for PDE problems in which most nonzeros are near the diagonal, 3254 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3255 or you will get TERRIBLE performance; see the users' manual chapter on 3256 matrices. 3257 3258 Level: intermediate 3259 3260 .keywords: matrix, block, aij, compressed row, sparse, parallel 3261 3262 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3263 @*/ 3264 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3265 { 3266 PetscErrorCode ierr; 3267 PetscMPIInt size; 3268 3269 PetscFunctionBegin; 3270 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3271 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3273 if (size > 1) { 3274 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3275 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3276 } else { 3277 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3278 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3279 } 3280 PetscFunctionReturn(0); 3281 } 3282 3283 #undef __FUNCT__ 3284 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3285 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3286 { 3287 Mat mat; 3288 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3289 PetscErrorCode ierr; 3290 PetscInt len=0; 3291 3292 PetscFunctionBegin; 3293 *newmat = 0; 3294 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3295 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3296 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3297 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3298 3299 mat->factortype = matin->factortype; 3300 mat->preallocated = PETSC_TRUE; 3301 mat->assembled = PETSC_TRUE; 3302 mat->insertmode = NOT_SET_VALUES; 3303 3304 a = (Mat_MPIBAIJ*)mat->data; 3305 mat->rmap->bs = matin->rmap->bs; 3306 a->bs2 = oldmat->bs2; 3307 a->mbs = oldmat->mbs; 3308 a->nbs = oldmat->nbs; 3309 a->Mbs = oldmat->Mbs; 3310 a->Nbs = oldmat->Nbs; 3311 3312 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3313 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3314 3315 a->size = oldmat->size; 3316 a->rank = oldmat->rank; 3317 a->donotstash = oldmat->donotstash; 3318 a->roworiented = oldmat->roworiented; 3319 a->rowindices = 0; 3320 a->rowvalues = 0; 3321 a->getrowactive = PETSC_FALSE; 3322 a->barray = 0; 3323 a->rstartbs = oldmat->rstartbs; 3324 a->rendbs = oldmat->rendbs; 3325 a->cstartbs = oldmat->cstartbs; 3326 a->cendbs = oldmat->cendbs; 3327 3328 /* hash table stuff */ 3329 a->ht = 0; 3330 a->hd = 0; 3331 a->ht_size = 0; 3332 a->ht_flag = oldmat->ht_flag; 3333 a->ht_fact = oldmat->ht_fact; 3334 a->ht_total_ct = 0; 3335 a->ht_insert_ct = 0; 3336 3337 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3338 if (oldmat->colmap) { 3339 #if defined(PETSC_USE_CTABLE) 3340 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3341 #else 3342 ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr); 3343 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3344 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3345 #endif 3346 } else a->colmap = 0; 3347 3348 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3349 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3350 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3351 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3352 } else a->garray = 0; 3353 3354 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3355 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3356 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3357 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3358 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3359 3360 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3361 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3362 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3363 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3364 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3365 *newmat = mat; 3366 PetscFunctionReturn(0); 3367 } 3368 3369 #undef __FUNCT__ 3370 #define __FUNCT__ "MatLoad_MPIBAIJ" 3371 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3372 { 3373 PetscErrorCode ierr; 3374 int fd; 3375 PetscInt i,nz,j,rstart,rend; 3376 PetscScalar *vals,*buf; 3377 MPI_Comm comm; 3378 MPI_Status status; 3379 PetscMPIInt rank,size,maxnz; 3380 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3381 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3382 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3383 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3384 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3385 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3386 3387 PetscFunctionBegin; 3388 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3389 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3390 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3391 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3392 3393 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3394 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3395 if (!rank) { 3396 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3397 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3398 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3399 } 3400 3401 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3402 3403 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3404 M = header[1]; N = header[2]; 3405 3406 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3407 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3408 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3409 3410 /* If global sizes are set, check if they are consistent with that given in the file */ 3411 if (sizesset) { 3412 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3413 } 3414 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3415 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3416 3417 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3418 3419 /* 3420 This code adds extra rows to make sure the number of rows is 3421 divisible by the blocksize 3422 */ 3423 Mbs = M/bs; 3424 extra_rows = bs - M + bs*Mbs; 3425 if (extra_rows == bs) extra_rows = 0; 3426 else Mbs++; 3427 if (extra_rows && !rank) { 3428 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3429 } 3430 3431 /* determine ownership of all rows */ 3432 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3433 mbs = Mbs/size + ((Mbs % size) > rank); 3434 m = mbs*bs; 3435 } else { /* User set */ 3436 m = newmat->rmap->n; 3437 mbs = m/bs; 3438 } 3439 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3440 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3441 3442 /* process 0 needs enough room for process with most rows */ 3443 if (!rank) { 3444 mmax = rowners[1]; 3445 for (i=2; i<=size; i++) { 3446 mmax = PetscMax(mmax,rowners[i]); 3447 } 3448 mmax*=bs; 3449 } else mmax = -1; /* unused, but compiler warns anyway */ 3450 3451 rowners[0] = 0; 3452 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3453 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3454 rstart = rowners[rank]; 3455 rend = rowners[rank+1]; 3456 3457 /* distribute row lengths to all processors */ 3458 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3459 if (!rank) { 3460 mend = m; 3461 if (size == 1) mend = mend - extra_rows; 3462 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3463 for (j=mend; j<m; j++) locrowlens[j] = 1; 3464 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3465 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3466 for (j=0; j<m; j++) { 3467 procsnz[0] += locrowlens[j]; 3468 } 3469 for (i=1; i<size; i++) { 3470 mend = browners[i+1] - browners[i]; 3471 if (i == size-1) mend = mend - extra_rows; 3472 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3473 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3474 /* calculate the number of nonzeros on each processor */ 3475 for (j=0; j<browners[i+1]-browners[i]; j++) { 3476 procsnz[i] += rowlengths[j]; 3477 } 3478 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3479 } 3480 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3481 } else { 3482 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3483 } 3484 3485 if (!rank) { 3486 /* determine max buffer needed and allocate it */ 3487 maxnz = procsnz[0]; 3488 for (i=1; i<size; i++) { 3489 maxnz = PetscMax(maxnz,procsnz[i]); 3490 } 3491 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3492 3493 /* read in my part of the matrix column indices */ 3494 nz = procsnz[0]; 3495 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3496 mycols = ibuf; 3497 if (size == 1) nz -= extra_rows; 3498 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3499 if (size == 1) { 3500 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3501 } 3502 3503 /* read in every ones (except the last) and ship off */ 3504 for (i=1; i<size-1; i++) { 3505 nz = procsnz[i]; 3506 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3507 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3508 } 3509 /* read in the stuff for the last proc */ 3510 if (size != 1) { 3511 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3512 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3513 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3514 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3515 } 3516 ierr = PetscFree(cols);CHKERRQ(ierr); 3517 } else { 3518 /* determine buffer space needed for message */ 3519 nz = 0; 3520 for (i=0; i<m; i++) { 3521 nz += locrowlens[i]; 3522 } 3523 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3524 mycols = ibuf; 3525 /* receive message of column indices*/ 3526 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3527 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3528 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3529 } 3530 3531 /* loop over local rows, determining number of off diagonal entries */ 3532 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3533 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3534 rowcount = 0; nzcount = 0; 3535 for (i=0; i<mbs; i++) { 3536 dcount = 0; 3537 odcount = 0; 3538 for (j=0; j<bs; j++) { 3539 kmax = locrowlens[rowcount]; 3540 for (k=0; k<kmax; k++) { 3541 tmp = mycols[nzcount++]/bs; 3542 if (!mask[tmp]) { 3543 mask[tmp] = 1; 3544 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3545 else masked1[dcount++] = tmp; 3546 } 3547 } 3548 rowcount++; 3549 } 3550 3551 dlens[i] = dcount; 3552 odlens[i] = odcount; 3553 3554 /* zero out the mask elements we set */ 3555 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3556 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3557 } 3558 3559 3560 if (!sizesset) { 3561 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3562 } 3563 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3564 3565 if (!rank) { 3566 ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr); 3567 /* read in my part of the matrix numerical values */ 3568 nz = procsnz[0]; 3569 vals = buf; 3570 mycols = ibuf; 3571 if (size == 1) nz -= extra_rows; 3572 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3573 if (size == 1) { 3574 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3575 } 3576 3577 /* insert into matrix */ 3578 jj = rstart*bs; 3579 for (i=0; i<m; i++) { 3580 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3581 mycols += locrowlens[i]; 3582 vals += locrowlens[i]; 3583 jj++; 3584 } 3585 /* read in other processors (except the last one) and ship out */ 3586 for (i=1; i<size-1; i++) { 3587 nz = procsnz[i]; 3588 vals = buf; 3589 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3590 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3591 } 3592 /* the last proc */ 3593 if (size != 1) { 3594 nz = procsnz[i] - extra_rows; 3595 vals = buf; 3596 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3597 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3598 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3599 } 3600 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3601 } else { 3602 /* receive numeric values */ 3603 ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr); 3604 3605 /* receive message of values*/ 3606 vals = buf; 3607 mycols = ibuf; 3608 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3609 3610 /* insert into matrix */ 3611 jj = rstart*bs; 3612 for (i=0; i<m; i++) { 3613 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3614 mycols += locrowlens[i]; 3615 vals += locrowlens[i]; 3616 jj++; 3617 } 3618 } 3619 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3620 ierr = PetscFree(buf);CHKERRQ(ierr); 3621 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3622 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3623 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3624 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3625 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3626 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3627 PetscFunctionReturn(0); 3628 } 3629 3630 #undef __FUNCT__ 3631 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3632 /*@ 3633 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3634 3635 Input Parameters: 3636 . mat - the matrix 3637 . fact - factor 3638 3639 Not Collective, each process can use a different factor 3640 3641 Level: advanced 3642 3643 Notes: 3644 This can also be set by the command line option: -mat_use_hash_table <fact> 3645 3646 .keywords: matrix, hashtable, factor, HT 3647 3648 .seealso: MatSetOption() 3649 @*/ 3650 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3651 { 3652 PetscErrorCode ierr; 3653 3654 PetscFunctionBegin; 3655 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3656 PetscFunctionReturn(0); 3657 } 3658 3659 #undef __FUNCT__ 3660 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3661 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3662 { 3663 Mat_MPIBAIJ *baij; 3664 3665 PetscFunctionBegin; 3666 baij = (Mat_MPIBAIJ*)mat->data; 3667 baij->ht_fact = fact; 3668 PetscFunctionReturn(0); 3669 } 3670 3671 #undef __FUNCT__ 3672 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3673 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3674 { 3675 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3676 3677 PetscFunctionBegin; 3678 *Ad = a->A; 3679 *Ao = a->B; 3680 *colmap = a->garray; 3681 PetscFunctionReturn(0); 3682 } 3683 3684 /* 3685 Special version for direct calls from Fortran (to eliminate two function call overheads 3686 */ 3687 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3688 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3689 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3690 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3691 #endif 3692 3693 #undef __FUNCT__ 3694 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3695 /*@C 3696 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3697 3698 Collective on Mat 3699 3700 Input Parameters: 3701 + mat - the matrix 3702 . min - number of input rows 3703 . im - input rows 3704 . nin - number of input columns 3705 . in - input columns 3706 . v - numerical values input 3707 - addvin - INSERT_VALUES or ADD_VALUES 3708 3709 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3710 3711 Level: advanced 3712 3713 .seealso: MatSetValuesBlocked() 3714 @*/ 3715 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3716 { 3717 /* convert input arguments to C version */ 3718 Mat mat = *matin; 3719 PetscInt m = *min, n = *nin; 3720 InsertMode addv = *addvin; 3721 3722 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3723 const MatScalar *value; 3724 MatScalar *barray = baij->barray; 3725 PetscBool roworiented = baij->roworiented; 3726 PetscErrorCode ierr; 3727 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3728 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3729 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3730 3731 PetscFunctionBegin; 3732 /* tasks normally handled by MatSetValuesBlocked() */ 3733 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3734 #if defined(PETSC_USE_DEBUG) 3735 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3736 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3737 #endif 3738 if (mat->assembled) { 3739 mat->was_assembled = PETSC_TRUE; 3740 mat->assembled = PETSC_FALSE; 3741 } 3742 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3743 3744 3745 if (!barray) { 3746 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3747 baij->barray = barray; 3748 } 3749 3750 if (roworiented) stepval = (n-1)*bs; 3751 else stepval = (m-1)*bs; 3752 3753 for (i=0; i<m; i++) { 3754 if (im[i] < 0) continue; 3755 #if defined(PETSC_USE_DEBUG) 3756 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3757 #endif 3758 if (im[i] >= rstart && im[i] < rend) { 3759 row = im[i] - rstart; 3760 for (j=0; j<n; j++) { 3761 /* If NumCol = 1 then a copy is not required */ 3762 if ((roworiented) && (n == 1)) { 3763 barray = (MatScalar*)v + i*bs2; 3764 } else if ((!roworiented) && (m == 1)) { 3765 barray = (MatScalar*)v + j*bs2; 3766 } else { /* Here a copy is required */ 3767 if (roworiented) { 3768 value = v + i*(stepval+bs)*bs + j*bs; 3769 } else { 3770 value = v + j*(stepval+bs)*bs + i*bs; 3771 } 3772 for (ii=0; ii<bs; ii++,value+=stepval) { 3773 for (jj=0; jj<bs; jj++) { 3774 *barray++ = *value++; 3775 } 3776 } 3777 barray -=bs2; 3778 } 3779 3780 if (in[j] >= cstart && in[j] < cend) { 3781 col = in[j] - cstart; 3782 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3783 } else if (in[j] < 0) continue; 3784 #if defined(PETSC_USE_DEBUG) 3785 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3786 #endif 3787 else { 3788 if (mat->was_assembled) { 3789 if (!baij->colmap) { 3790 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3791 } 3792 3793 #if defined(PETSC_USE_DEBUG) 3794 #if defined(PETSC_USE_CTABLE) 3795 { PetscInt data; 3796 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3797 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3798 } 3799 #else 3800 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3801 #endif 3802 #endif 3803 #if defined(PETSC_USE_CTABLE) 3804 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3805 col = (col - 1)/bs; 3806 #else 3807 col = (baij->colmap[in[j]] - 1)/bs; 3808 #endif 3809 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3810 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3811 col = in[j]; 3812 } 3813 } else col = in[j]; 3814 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3815 } 3816 } 3817 } else { 3818 if (!baij->donotstash) { 3819 if (roworiented) { 3820 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3821 } else { 3822 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3823 } 3824 } 3825 } 3826 } 3827 3828 /* task normally handled by MatSetValuesBlocked() */ 3829 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3830 PetscFunctionReturn(0); 3831 } 3832 3833 #undef __FUNCT__ 3834 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3835 /*@ 3836 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3837 CSR format the local rows. 3838 3839 Collective on MPI_Comm 3840 3841 Input Parameters: 3842 + comm - MPI communicator 3843 . bs - the block size, only a block size of 1 is supported 3844 . m - number of local rows (Cannot be PETSC_DECIDE) 3845 . n - This value should be the same as the local size used in creating the 3846 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3847 calculated if N is given) For square matrices n is almost always m. 3848 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3849 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3850 . i - row indices 3851 . j - column indices 3852 - a - matrix values 3853 3854 Output Parameter: 3855 . mat - the matrix 3856 3857 Level: intermediate 3858 3859 Notes: 3860 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3861 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3862 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3863 3864 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3865 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3866 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3867 with column-major ordering within blocks. 3868 3869 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3870 3871 .keywords: matrix, aij, compressed row, sparse, parallel 3872 3873 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3874 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3875 @*/ 3876 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3877 { 3878 PetscErrorCode ierr; 3879 3880 PetscFunctionBegin; 3881 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3882 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3883 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3884 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3885 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3886 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3887 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3888 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3889 PetscFunctionReturn(0); 3890 } 3891