1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscInt i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs; 15 PetscScalar *va,*vv; 16 Vec vB,vA; 17 const PetscScalar *vb; 18 19 PetscFunctionBegin; 20 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 21 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 22 23 PetscCall(VecGetArrayWrite(vA,&va)); 24 if (idx) { 25 for (i=0; i<m; i++) { 26 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 27 } 28 } 29 30 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 31 PetscCall(PetscMalloc1(m,&idxb)); 32 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 33 34 PetscCall(VecGetArrayWrite(v,&vv)); 35 PetscCall(VecGetArrayRead(vB,&vb)); 36 for (i=0; i<m; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 38 vv[i] = vb[i]; 39 if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 40 } else { 41 vv[i] = va[i]; 42 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs)) 43 idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 44 } 45 } 46 PetscCall(VecRestoreArrayWrite(vA,&vv)); 47 PetscCall(VecRestoreArrayWrite(vA,&va)); 48 PetscCall(VecRestoreArrayRead(vB,&vb)); 49 PetscCall(PetscFree(idxb)); 50 PetscCall(VecDestroy(&vA)); 51 PetscCall(VecDestroy(&vB)); 52 PetscFunctionReturn(0); 53 } 54 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 59 PetscFunctionBegin; 60 PetscCall(MatStoreValues(aij->A)); 61 PetscCall(MatStoreValues(aij->B)); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 68 69 PetscFunctionBegin; 70 PetscCall(MatRetrieveValues(aij->A)); 71 PetscCall(MatRetrieveValues(aij->B)); 72 PetscFunctionReturn(0); 73 } 74 75 /* 76 Local utility routine that creates a mapping from the global column 77 number to the local number in the off-diagonal part of the local 78 storage of the matrix. This is done in a non scalable way since the 79 length of colmap equals the global matrix length. 80 */ 81 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 82 { 83 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 84 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 85 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 86 87 PetscFunctionBegin; 88 #if defined(PETSC_USE_CTABLE) 89 PetscCall(PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap)); 90 for (i=0; i<nbs; i++) { 91 PetscCall(PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES)); 92 } 93 #else 94 PetscCall(PetscCalloc1(baij->Nbs+1,&baij->colmap)); 95 PetscCall(PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt))); 96 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 102 { \ 103 brow = row/bs; \ 104 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 105 rmax = aimax[brow]; nrow = ailen[brow]; \ 106 bcol = col/bs; \ 107 ridx = row % bs; cidx = col % bs; \ 108 low = 0; high = nrow; \ 109 while (high-low > 3) { \ 110 t = (low+high)/2; \ 111 if (rp[t] > bcol) high = t; \ 112 else low = t; \ 113 } \ 114 for (_i=low; _i<high; _i++) { \ 115 if (rp[_i] > bcol) break; \ 116 if (rp[_i] == bcol) { \ 117 bap = ap + bs2*_i + bs*cidx + ridx; \ 118 if (addv == ADD_VALUES) *bap += value; \ 119 else *bap = value; \ 120 goto a_noinsert; \ 121 } \ 122 } \ 123 if (a->nonew == 1) goto a_noinsert; \ 124 PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 125 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 126 N = nrow++ - 1; \ 127 /* shift up all the later entries in this row */ \ 128 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\ 129 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 130 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 131 rp[_i] = bcol; \ 132 ap[bs2*_i + bs*cidx + ridx] = value; \ 133 a_noinsert:; \ 134 ailen[brow] = nrow; \ 135 } 136 137 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 138 { \ 139 brow = row/bs; \ 140 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 141 rmax = bimax[brow]; nrow = bilen[brow]; \ 142 bcol = col/bs; \ 143 ridx = row % bs; cidx = col % bs; \ 144 low = 0; high = nrow; \ 145 while (high-low > 3) { \ 146 t = (low+high)/2; \ 147 if (rp[t] > bcol) high = t; \ 148 else low = t; \ 149 } \ 150 for (_i=low; _i<high; _i++) { \ 151 if (rp[_i] > bcol) break; \ 152 if (rp[_i] == bcol) { \ 153 bap = ap + bs2*_i + bs*cidx + ridx; \ 154 if (addv == ADD_VALUES) *bap += value; \ 155 else *bap = value; \ 156 goto b_noinsert; \ 157 } \ 158 } \ 159 if (b->nonew == 1) goto b_noinsert; \ 160 PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 161 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 162 N = nrow++ - 1; \ 163 /* shift up all the later entries in this row */ \ 164 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\ 165 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)));\ 166 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 167 rp[_i] = bcol; \ 168 ap[bs2*_i + bs*cidx + ridx] = value; \ 169 b_noinsert:; \ 170 bilen[brow] = nrow; \ 171 } 172 173 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 174 { 175 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 176 MatScalar value; 177 PetscBool roworiented = baij->roworiented; 178 PetscInt i,j,row,col; 179 PetscInt rstart_orig=mat->rmap->rstart; 180 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 181 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 182 183 /* Some Variables required in the macro */ 184 Mat A = baij->A; 185 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 186 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 187 MatScalar *aa =a->a; 188 189 Mat B = baij->B; 190 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 191 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 192 MatScalar *ba =b->a; 193 194 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 195 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 196 MatScalar *ap,*bap; 197 198 PetscFunctionBegin; 199 for (i=0; i<m; i++) { 200 if (im[i] < 0) continue; 201 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 202 if (im[i] >= rstart_orig && im[i] < rend_orig) { 203 row = im[i] - rstart_orig; 204 for (j=0; j<n; j++) { 205 if (in[j] >= cstart_orig && in[j] < cend_orig) { 206 col = in[j] - cstart_orig; 207 if (roworiented) value = v[i*n+j]; 208 else value = v[i+j*m]; 209 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 210 } else if (in[j] < 0) continue; 211 else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 212 else { 213 if (mat->was_assembled) { 214 if (!baij->colmap) { 215 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 216 } 217 #if defined(PETSC_USE_CTABLE) 218 PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col)); 219 col = col - 1; 220 #else 221 col = baij->colmap[in[j]/bs] - 1; 222 #endif 223 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 224 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 225 col = in[j]; 226 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 227 B = baij->B; 228 b = (Mat_SeqBAIJ*)(B)->data; 229 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 230 ba =b->a; 231 } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 232 else col += in[j]%bs; 233 } else col = in[j]; 234 if (roworiented) value = v[i*n+j]; 235 else value = v[i+j*m]; 236 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 237 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 238 } 239 } 240 } else { 241 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 242 if (!baij->donotstash) { 243 mat->assembled = PETSC_FALSE; 244 if (roworiented) { 245 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE)); 246 } else { 247 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE)); 248 } 249 } 250 } 251 } 252 PetscFunctionReturn(0); 253 } 254 255 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 256 { 257 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 258 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 259 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 260 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 261 PetscBool roworiented=a->roworiented; 262 const PetscScalar *value = v; 263 MatScalar *ap,*aa = a->a,*bap; 264 265 PetscFunctionBegin; 266 rp = aj + ai[row]; 267 ap = aa + bs2*ai[row]; 268 rmax = imax[row]; 269 nrow = ailen[row]; 270 value = v; 271 low = 0; 272 high = nrow; 273 while (high-low > 7) { 274 t = (low+high)/2; 275 if (rp[t] > col) high = t; 276 else low = t; 277 } 278 for (i=low; i<high; i++) { 279 if (rp[i] > col) break; 280 if (rp[i] == col) { 281 bap = ap + bs2*i; 282 if (roworiented) { 283 if (is == ADD_VALUES) { 284 for (ii=0; ii<bs; ii++) { 285 for (jj=ii; jj<bs2; jj+=bs) { 286 bap[jj] += *value++; 287 } 288 } 289 } else { 290 for (ii=0; ii<bs; ii++) { 291 for (jj=ii; jj<bs2; jj+=bs) { 292 bap[jj] = *value++; 293 } 294 } 295 } 296 } else { 297 if (is == ADD_VALUES) { 298 for (ii=0; ii<bs; ii++,value+=bs) { 299 for (jj=0; jj<bs; jj++) { 300 bap[jj] += value[jj]; 301 } 302 bap += bs; 303 } 304 } else { 305 for (ii=0; ii<bs; ii++,value+=bs) { 306 for (jj=0; jj<bs; jj++) { 307 bap[jj] = value[jj]; 308 } 309 bap += bs; 310 } 311 } 312 } 313 goto noinsert2; 314 } 315 } 316 if (nonew == 1) goto noinsert2; 317 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 318 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 319 N = nrow++ - 1; high++; 320 /* shift up all the later entries in this row */ 321 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 322 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 323 rp[i] = col; 324 bap = ap + bs2*i; 325 if (roworiented) { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=ii; jj<bs2; jj+=bs) { 328 bap[jj] = *value++; 329 } 330 } 331 } else { 332 for (ii=0; ii<bs; ii++) { 333 for (jj=0; jj<bs; jj++) { 334 *bap++ = *value++; 335 } 336 } 337 } 338 noinsert2:; 339 ailen[row] = nrow; 340 PetscFunctionReturn(0); 341 } 342 343 /* 344 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 345 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 346 */ 347 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 348 { 349 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 350 const PetscScalar *value; 351 MatScalar *barray = baij->barray; 352 PetscBool roworiented = baij->roworiented; 353 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 354 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 355 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 356 357 PetscFunctionBegin; 358 if (!barray) { 359 PetscCall(PetscMalloc1(bs2,&barray)); 360 baij->barray = barray; 361 } 362 363 if (roworiented) stepval = (n-1)*bs; 364 else stepval = (m-1)*bs; 365 366 for (i=0; i<m; i++) { 367 if (im[i] < 0) continue; 368 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 369 if (im[i] >= rstart && im[i] < rend) { 370 row = im[i] - rstart; 371 for (j=0; j<n; j++) { 372 /* If NumCol = 1 then a copy is not required */ 373 if ((roworiented) && (n == 1)) { 374 barray = (MatScalar*)v + i*bs2; 375 } else if ((!roworiented) && (m == 1)) { 376 barray = (MatScalar*)v + j*bs2; 377 } else { /* Here a copy is required */ 378 if (roworiented) { 379 value = v + (i*(stepval+bs) + j)*bs; 380 } else { 381 value = v + (j*(stepval+bs) + i)*bs; 382 } 383 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 384 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 385 barray += bs; 386 } 387 barray -= bs2; 388 } 389 390 if (in[j] >= cstart && in[j] < cend) { 391 col = in[j] - cstart; 392 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 393 } else if (in[j] < 0) continue; 394 else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 395 else { 396 if (mat->was_assembled) { 397 if (!baij->colmap) { 398 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 399 } 400 401 #if defined(PETSC_USE_DEBUG) 402 #if defined(PETSC_USE_CTABLE) 403 { PetscInt data; 404 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 405 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 406 } 407 #else 408 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 409 #endif 410 #endif 411 #if defined(PETSC_USE_CTABLE) 412 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 413 col = (col - 1)/bs; 414 #else 415 col = (baij->colmap[in[j]] - 1)/bs; 416 #endif 417 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 418 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 419 col = in[j]; 420 } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]); 421 } else col = in[j]; 422 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 423 } 424 } 425 } else { 426 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 427 if (!baij->donotstash) { 428 if (roworiented) { 429 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 430 } else { 431 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 432 } 433 } 434 } 435 } 436 PetscFunctionReturn(0); 437 } 438 439 #define HASH_KEY 0.6180339887 440 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 441 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 442 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 443 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 444 { 445 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 446 PetscBool roworiented = baij->roworiented; 447 PetscInt i,j,row,col; 448 PetscInt rstart_orig=mat->rmap->rstart; 449 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 450 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 451 PetscReal tmp; 452 MatScalar **HD = baij->hd,value; 453 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 454 455 PetscFunctionBegin; 456 for (i=0; i<m; i++) { 457 if (PetscDefined(USE_DEBUG)) { 458 PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 459 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 460 } 461 row = im[i]; 462 if (row >= rstart_orig && row < rend_orig) { 463 for (j=0; j<n; j++) { 464 col = in[j]; 465 if (roworiented) value = v[i*n+j]; 466 else value = v[i+j*m]; 467 /* Look up PetscInto the Hash Table */ 468 key = (row/bs)*Nbs+(col/bs)+1; 469 h1 = HASH(size,key,tmp); 470 471 idx = h1; 472 if (PetscDefined(USE_DEBUG)) { 473 insert_ct++; 474 total_ct++; 475 if (HT[idx] != key) { 476 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 477 if (idx == size) { 478 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 479 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 480 } 481 } 482 } else if (HT[idx] != key) { 483 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 484 if (idx == size) { 485 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 486 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 487 } 488 } 489 /* A HASH table entry is found, so insert the values at the correct address */ 490 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 491 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 492 } 493 } else if (!baij->donotstash) { 494 if (roworiented) { 495 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE)); 496 } else { 497 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE)); 498 } 499 } 500 } 501 if (PetscDefined(USE_DEBUG)) { 502 baij->ht_total_ct += total_ct; 503 baij->ht_insert_ct += insert_ct; 504 } 505 PetscFunctionReturn(0); 506 } 507 508 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 509 { 510 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 511 PetscBool roworiented = baij->roworiented; 512 PetscInt i,j,ii,jj,row,col; 513 PetscInt rstart=baij->rstartbs; 514 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 515 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 516 PetscReal tmp; 517 MatScalar **HD = baij->hd,*baij_a; 518 const PetscScalar *v_t,*value; 519 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 520 521 PetscFunctionBegin; 522 if (roworiented) stepval = (n-1)*bs; 523 else stepval = (m-1)*bs; 524 525 for (i=0; i<m; i++) { 526 if (PetscDefined(USE_DEBUG)) { 527 PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]); 528 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 529 } 530 row = im[i]; 531 v_t = v + i*nbs2; 532 if (row >= rstart && row < rend) { 533 for (j=0; j<n; j++) { 534 col = in[j]; 535 536 /* Look up into the Hash Table */ 537 key = row*Nbs+col+1; 538 h1 = HASH(size,key,tmp); 539 540 idx = h1; 541 if (PetscDefined(USE_DEBUG)) { 542 total_ct++; 543 insert_ct++; 544 if (HT[idx] != key) { 545 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 546 if (idx == size) { 547 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 548 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 549 } 550 } 551 } else if (HT[idx] != key) { 552 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 553 if (idx == size) { 554 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 555 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 556 } 557 } 558 baij_a = HD[idx]; 559 if (roworiented) { 560 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 561 /* value = v + (i*(stepval+bs)+j)*bs; */ 562 value = v_t; 563 v_t += bs; 564 if (addv == ADD_VALUES) { 565 for (ii=0; ii<bs; ii++,value+=stepval) { 566 for (jj=ii; jj<bs2; jj+=bs) { 567 baij_a[jj] += *value++; 568 } 569 } 570 } else { 571 for (ii=0; ii<bs; ii++,value+=stepval) { 572 for (jj=ii; jj<bs2; jj+=bs) { 573 baij_a[jj] = *value++; 574 } 575 } 576 } 577 } else { 578 value = v + j*(stepval+bs)*bs + i*bs; 579 if (addv == ADD_VALUES) { 580 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 581 for (jj=0; jj<bs; jj++) { 582 baij_a[jj] += *value++; 583 } 584 } 585 } else { 586 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 587 for (jj=0; jj<bs; jj++) { 588 baij_a[jj] = *value++; 589 } 590 } 591 } 592 } 593 } 594 } else { 595 if (!baij->donotstash) { 596 if (roworiented) { 597 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 598 } else { 599 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 600 } 601 } 602 } 603 } 604 if (PetscDefined(USE_DEBUG)) { 605 baij->ht_total_ct += total_ct; 606 baij->ht_insert_ct += insert_ct; 607 } 608 PetscFunctionReturn(0); 609 } 610 611 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 612 { 613 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 614 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 615 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 616 617 PetscFunctionBegin; 618 for (i=0; i<m; i++) { 619 if (idxm[i] < 0) continue; /* negative row */ 620 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 621 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 622 row = idxm[i] - bsrstart; 623 for (j=0; j<n; j++) { 624 if (idxn[j] < 0) continue; /* negative column */ 625 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 626 if (idxn[j] >= bscstart && idxn[j] < bscend) { 627 col = idxn[j] - bscstart; 628 PetscCall(MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j)); 629 } else { 630 if (!baij->colmap) { 631 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 632 } 633 #if defined(PETSC_USE_CTABLE) 634 PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data)); 635 data--; 636 #else 637 data = baij->colmap[idxn[j]/bs]-1; 638 #endif 639 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 640 else { 641 col = data + idxn[j]%bs; 642 PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j)); 643 } 644 } 645 } 646 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 647 } 648 PetscFunctionReturn(0); 649 } 650 651 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 652 { 653 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 654 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 655 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 656 PetscReal sum = 0.0; 657 MatScalar *v; 658 659 PetscFunctionBegin; 660 if (baij->size == 1) { 661 PetscCall(MatNorm(baij->A,type,nrm)); 662 } else { 663 if (type == NORM_FROBENIUS) { 664 v = amat->a; 665 nz = amat->nz*bs2; 666 for (i=0; i<nz; i++) { 667 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 668 } 669 v = bmat->a; 670 nz = bmat->nz*bs2; 671 for (i=0; i<nz; i++) { 672 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 673 } 674 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 675 *nrm = PetscSqrtReal(*nrm); 676 } else if (type == NORM_1) { /* max column sum */ 677 PetscReal *tmp,*tmp2; 678 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 679 PetscCall(PetscCalloc1(mat->cmap->N,&tmp)); 680 PetscCall(PetscMalloc1(mat->cmap->N,&tmp2)); 681 v = amat->a; jj = amat->j; 682 for (i=0; i<amat->nz; i++) { 683 for (j=0; j<bs; j++) { 684 col = bs*(cstart + *jj) + j; /* column index */ 685 for (row=0; row<bs; row++) { 686 tmp[col] += PetscAbsScalar(*v); v++; 687 } 688 } 689 jj++; 690 } 691 v = bmat->a; jj = bmat->j; 692 for (i=0; i<bmat->nz; i++) { 693 for (j=0; j<bs; j++) { 694 col = bs*garray[*jj] + j; 695 for (row=0; row<bs; row++) { 696 tmp[col] += PetscAbsScalar(*v); v++; 697 } 698 } 699 jj++; 700 } 701 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 702 *nrm = 0.0; 703 for (j=0; j<mat->cmap->N; j++) { 704 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 705 } 706 PetscCall(PetscFree(tmp)); 707 PetscCall(PetscFree(tmp2)); 708 } else if (type == NORM_INFINITY) { /* max row sum */ 709 PetscReal *sums; 710 PetscCall(PetscMalloc1(bs,&sums)); 711 sum = 0.0; 712 for (j=0; j<amat->mbs; j++) { 713 for (row=0; row<bs; row++) sums[row] = 0.0; 714 v = amat->a + bs2*amat->i[j]; 715 nz = amat->i[j+1]-amat->i[j]; 716 for (i=0; i<nz; i++) { 717 for (col=0; col<bs; col++) { 718 for (row=0; row<bs; row++) { 719 sums[row] += PetscAbsScalar(*v); v++; 720 } 721 } 722 } 723 v = bmat->a + bs2*bmat->i[j]; 724 nz = bmat->i[j+1]-bmat->i[j]; 725 for (i=0; i<nz; i++) { 726 for (col=0; col<bs; col++) { 727 for (row=0; row<bs; row++) { 728 sums[row] += PetscAbsScalar(*v); v++; 729 } 730 } 731 } 732 for (row=0; row<bs; row++) { 733 if (sums[row] > sum) sum = sums[row]; 734 } 735 } 736 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 737 PetscCall(PetscFree(sums)); 738 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 /* 744 Creates the hash table, and sets the table 745 This table is created only once. 746 If new entried need to be added to the matrix 747 then the hash table has to be destroyed and 748 recreated. 749 */ 750 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 751 { 752 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 753 Mat A = baij->A,B=baij->B; 754 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 755 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 756 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 757 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 758 PetscInt *HT,key; 759 MatScalar **HD; 760 PetscReal tmp; 761 #if defined(PETSC_USE_INFO) 762 PetscInt ct=0,max=0; 763 #endif 764 765 PetscFunctionBegin; 766 if (baij->ht) PetscFunctionReturn(0); 767 768 baij->ht_size = (PetscInt)(factor*nz); 769 ht_size = baij->ht_size; 770 771 /* Allocate Memory for Hash Table */ 772 PetscCall(PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht)); 773 HD = baij->hd; 774 HT = baij->ht; 775 776 /* Loop Over A */ 777 for (i=0; i<a->mbs; i++) { 778 for (j=ai[i]; j<ai[i+1]; j++) { 779 row = i+rstart; 780 col = aj[j]+cstart; 781 782 key = row*Nbs + col + 1; 783 h1 = HASH(ht_size,key,tmp); 784 for (k=0; k<ht_size; k++) { 785 if (!HT[(h1+k)%ht_size]) { 786 HT[(h1+k)%ht_size] = key; 787 HD[(h1+k)%ht_size] = a->a + j*bs2; 788 break; 789 #if defined(PETSC_USE_INFO) 790 } else { 791 ct++; 792 #endif 793 } 794 } 795 #if defined(PETSC_USE_INFO) 796 if (k> max) max = k; 797 #endif 798 } 799 } 800 /* Loop Over B */ 801 for (i=0; i<b->mbs; i++) { 802 for (j=bi[i]; j<bi[i+1]; j++) { 803 row = i+rstart; 804 col = garray[bj[j]]; 805 key = row*Nbs + col + 1; 806 h1 = HASH(ht_size,key,tmp); 807 for (k=0; k<ht_size; k++) { 808 if (!HT[(h1+k)%ht_size]) { 809 HT[(h1+k)%ht_size] = key; 810 HD[(h1+k)%ht_size] = b->a + j*bs2; 811 break; 812 #if defined(PETSC_USE_INFO) 813 } else { 814 ct++; 815 #endif 816 } 817 } 818 #if defined(PETSC_USE_INFO) 819 if (k> max) max = k; 820 #endif 821 } 822 } 823 824 /* Print Summary */ 825 #if defined(PETSC_USE_INFO) 826 for (i=0,j=0; i<ht_size; i++) { 827 if (HT[i]) j++; 828 } 829 PetscCall(PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max)); 830 #endif 831 PetscFunctionReturn(0); 832 } 833 834 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 835 { 836 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 837 PetscInt nstash,reallocs; 838 839 PetscFunctionBegin; 840 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 841 842 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 843 PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs)); 844 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 845 PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 846 PetscCall(MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs)); 847 PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 848 PetscFunctionReturn(0); 849 } 850 851 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 852 { 853 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 854 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscBool r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 PetscMPIInt n; 860 861 PetscFunctionBegin; 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 if (!baij->donotstash && !mat->nooffprocentries) { 864 while (1) { 865 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 866 if (!flg) break; 867 868 for (i=0; i<n;) { 869 /* Now identify the consecutive vals belonging to the same row */ 870 for (j=i,rstart=row[j]; j<n; j++) { 871 if (row[j] != rstart) break; 872 } 873 if (j < n) ncols = j-i; 874 else ncols = n-i; 875 /* Now assemble all these values with a single function call */ 876 PetscCall(MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 877 i = j; 878 } 879 } 880 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 881 /* Now process the block-stash. Since the values are stashed column-oriented, 882 set the roworiented flag to column oriented, and after MatSetValues() 883 restore the original flags */ 884 r1 = baij->roworiented; 885 r2 = a->roworiented; 886 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 887 888 baij->roworiented = PETSC_FALSE; 889 a->roworiented = PETSC_FALSE; 890 891 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 892 while (1) { 893 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg)); 894 if (!flg) break; 895 896 for (i=0; i<n;) { 897 /* Now identify the consecutive vals belonging to the same row */ 898 for (j=i,rstart=row[j]; j<n; j++) { 899 if (row[j] != rstart) break; 900 } 901 if (j < n) ncols = j-i; 902 else ncols = n-i; 903 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode)); 904 i = j; 905 } 906 } 907 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 908 909 baij->roworiented = r1; 910 a->roworiented = r2; 911 912 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 913 } 914 915 PetscCall(MatAssemblyBegin(baij->A,mode)); 916 PetscCall(MatAssemblyEnd(baij->A,mode)); 917 918 /* determine if any processor has disassembled, if so we must 919 also disassemble ourselves, in order that we may reassemble. */ 920 /* 921 if nonzero structure of submatrix B cannot change then we know that 922 no processor disassembled thus we can skip this stuff 923 */ 924 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 925 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 926 if (mat->was_assembled && !other_disassembled) { 927 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 928 } 929 } 930 931 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 932 PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 933 } 934 PetscCall(MatAssemblyBegin(baij->B,mode)); 935 PetscCall(MatAssemblyEnd(baij->B,mode)); 936 937 #if defined(PETSC_USE_INFO) 938 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 939 PetscCall(PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct)); 940 941 baij->ht_total_ct = 0; 942 baij->ht_insert_ct = 0; 943 } 944 #endif 945 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 946 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact)); 947 948 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 949 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 950 } 951 952 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 953 954 baij->rowvalues = NULL; 955 956 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 957 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 958 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 959 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 960 } 961 PetscFunctionReturn(0); 962 } 963 964 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 965 #include <petscdraw.h> 966 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 967 { 968 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 969 PetscMPIInt rank = baij->rank; 970 PetscInt bs = mat->rmap->bs; 971 PetscBool iascii,isdraw; 972 PetscViewer sviewer; 973 PetscViewerFormat format; 974 975 PetscFunctionBegin; 976 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 977 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 978 if (iascii) { 979 PetscCall(PetscViewerGetFormat(viewer,&format)); 980 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 981 MatInfo info; 982 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 983 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 984 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 985 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", 986 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory)); 987 PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info)); 988 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 989 PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info)); 990 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 991 PetscCall(PetscViewerFlush(viewer)); 992 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 993 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 994 PetscCall(VecScatterView(baij->Mvctx,viewer)); 995 PetscFunctionReturn(0); 996 } else if (format == PETSC_VIEWER_ASCII_INFO) { 997 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 998 PetscFunctionReturn(0); 999 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1000 PetscFunctionReturn(0); 1001 } 1002 } 1003 1004 if (isdraw) { 1005 PetscDraw draw; 1006 PetscBool isnull; 1007 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1008 PetscCall(PetscDrawIsNull(draw,&isnull)); 1009 if (isnull) PetscFunctionReturn(0); 1010 } 1011 1012 { 1013 /* assemble the entire matrix onto first processor. */ 1014 Mat A; 1015 Mat_SeqBAIJ *Aloc; 1016 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1017 MatScalar *a; 1018 const char *matname; 1019 1020 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1021 /* Perhaps this should be the type of mat? */ 1022 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 1023 if (rank == 0) { 1024 PetscCall(MatSetSizes(A,M,N,M,N)); 1025 } else { 1026 PetscCall(MatSetSizes(A,0,0,M,N)); 1027 } 1028 PetscCall(MatSetType(A,MATMPIBAIJ)); 1029 PetscCall(MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL)); 1030 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 1031 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 1032 1033 /* copy over the A part */ 1034 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1035 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1036 PetscCall(PetscMalloc1(bs,&rvals)); 1037 1038 for (i=0; i<mbs; i++) { 1039 rvals[0] = bs*(baij->rstartbs + i); 1040 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1041 for (j=ai[i]; j<ai[i+1]; j++) { 1042 col = (baij->cstartbs+aj[j])*bs; 1043 for (k=0; k<bs; k++) { 1044 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 1045 col++; a += bs; 1046 } 1047 } 1048 } 1049 /* copy over the B part */ 1050 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1051 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1052 for (i=0; i<mbs; i++) { 1053 rvals[0] = bs*(baij->rstartbs + i); 1054 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1055 for (j=ai[i]; j<ai[i+1]; j++) { 1056 col = baij->garray[aj[j]]*bs; 1057 for (k=0; k<bs; k++) { 1058 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 1059 col++; a += bs; 1060 } 1061 } 1062 } 1063 PetscCall(PetscFree(rvals)); 1064 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1065 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 1066 /* 1067 Everyone has to call to draw the matrix since the graphics waits are 1068 synchronized across all processors that share the PetscDraw object 1069 */ 1070 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1071 PetscCall(PetscObjectGetName((PetscObject)mat,&matname)); 1072 if (rank == 0) { 1073 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname)); 1074 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer)); 1075 } 1076 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1077 PetscCall(PetscViewerFlush(viewer)); 1078 PetscCall(MatDestroy(&A)); 1079 } 1080 PetscFunctionReturn(0); 1081 } 1082 1083 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1084 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1085 { 1086 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1087 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1088 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1089 const PetscInt *garray = aij->garray; 1090 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1091 PetscInt *rowlens,*colidxs; 1092 PetscScalar *matvals; 1093 1094 PetscFunctionBegin; 1095 PetscCall(PetscViewerSetUp(viewer)); 1096 1097 M = mat->rmap->N; 1098 N = mat->cmap->N; 1099 m = mat->rmap->n; 1100 rs = mat->rmap->rstart; 1101 cs = mat->cmap->rstart; 1102 bs = mat->rmap->bs; 1103 nz = bs*bs*(A->nz + B->nz); 1104 1105 /* write matrix header */ 1106 header[0] = MAT_FILE_CLASSID; 1107 header[1] = M; header[2] = N; header[3] = nz; 1108 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1109 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1110 1111 /* fill in and store row lengths */ 1112 PetscCall(PetscMalloc1(m,&rowlens)); 1113 for (cnt=0, i=0; i<A->mbs; i++) 1114 for (j=0; j<bs; j++) 1115 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1116 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1117 PetscCall(PetscFree(rowlens)); 1118 1119 /* fill in and store column indices */ 1120 PetscCall(PetscMalloc1(nz,&colidxs)); 1121 for (cnt=0, i=0; i<A->mbs; i++) { 1122 for (k=0; k<bs; k++) { 1123 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1124 if (garray[B->j[jb]] > cs/bs) break; 1125 for (l=0; l<bs; l++) 1126 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1127 } 1128 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1129 for (l=0; l<bs; l++) 1130 colidxs[cnt++] = bs*A->j[ja] + l + cs; 1131 for (; jb<B->i[i+1]; jb++) 1132 for (l=0; l<bs; l++) 1133 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1134 } 1135 } 1136 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1137 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT)); 1138 PetscCall(PetscFree(colidxs)); 1139 1140 /* fill in and store nonzero values */ 1141 PetscCall(PetscMalloc1(nz,&matvals)); 1142 for (cnt=0, i=0; i<A->mbs; i++) { 1143 for (k=0; k<bs; k++) { 1144 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1145 if (garray[B->j[jb]] > cs/bs) break; 1146 for (l=0; l<bs; l++) 1147 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1148 } 1149 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1150 for (l=0; l<bs; l++) 1151 matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1152 for (; jb<B->i[i+1]; jb++) 1153 for (l=0; l<bs; l++) 1154 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1155 } 1156 } 1157 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR)); 1158 PetscCall(PetscFree(matvals)); 1159 1160 /* write block size option to the viewer's .info file */ 1161 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1166 { 1167 PetscBool iascii,isdraw,issocket,isbinary; 1168 1169 PetscFunctionBegin; 1170 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1171 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1172 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1173 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1174 if (iascii || isdraw || issocket) { 1175 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer)); 1176 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat,viewer)); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1181 { 1182 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1183 1184 PetscFunctionBegin; 1185 #if defined(PETSC_USE_LOG) 1186 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1187 #endif 1188 PetscCall(MatStashDestroy_Private(&mat->stash)); 1189 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1190 PetscCall(MatDestroy(&baij->A)); 1191 PetscCall(MatDestroy(&baij->B)); 1192 #if defined(PETSC_USE_CTABLE) 1193 PetscCall(PetscTableDestroy(&baij->colmap)); 1194 #else 1195 PetscCall(PetscFree(baij->colmap)); 1196 #endif 1197 PetscCall(PetscFree(baij->garray)); 1198 PetscCall(VecDestroy(&baij->lvec)); 1199 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1200 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 1201 PetscCall(PetscFree(baij->barray)); 1202 PetscCall(PetscFree2(baij->hd,baij->ht)); 1203 PetscCall(PetscFree(baij->rangebs)); 1204 PetscCall(PetscFree(mat->data)); 1205 1206 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1207 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1208 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1209 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL)); 1210 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL)); 1211 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1212 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL)); 1213 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL)); 1214 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiadj_C",NULL)); 1215 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiaij_C",NULL)); 1216 #if defined(PETSC_HAVE_HYPRE) 1217 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL)); 1218 #endif 1219 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL)); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1224 { 1225 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1226 PetscInt nt; 1227 1228 PetscFunctionBegin; 1229 PetscCall(VecGetLocalSize(xx,&nt)); 1230 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1231 PetscCall(VecGetLocalSize(yy,&nt)); 1232 PetscCheck(nt == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1233 PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1234 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 1235 PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1236 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1241 { 1242 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1243 1244 PetscFunctionBegin; 1245 PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1246 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 1247 PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1248 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1253 { 1254 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1255 1256 PetscFunctionBegin; 1257 /* do nondiagonal part */ 1258 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1259 /* do local part */ 1260 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1261 /* add partial results together */ 1262 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1263 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1268 { 1269 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1270 1271 PetscFunctionBegin; 1272 /* do nondiagonal part */ 1273 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1274 /* do local part */ 1275 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1276 /* add partial results together */ 1277 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1278 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 /* 1283 This only works correctly for square matrices where the subblock A->A is the 1284 diagonal block 1285 */ 1286 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1287 { 1288 PetscFunctionBegin; 1289 PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1290 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ*)A->data)->A,v)); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1295 { 1296 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1297 1298 PetscFunctionBegin; 1299 PetscCall(MatScale(a->A,aa)); 1300 PetscCall(MatScale(a->B,aa)); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1305 { 1306 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1307 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1308 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1309 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1310 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1311 1312 PetscFunctionBegin; 1313 PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1314 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1315 mat->getrowactive = PETSC_TRUE; 1316 1317 if (!mat->rowvalues && (idx || v)) { 1318 /* 1319 allocate enough space to hold information from the longest row. 1320 */ 1321 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1322 PetscInt max = 1,mbs = mat->mbs,tmp; 1323 for (i=0; i<mbs; i++) { 1324 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1325 if (max < tmp) max = tmp; 1326 } 1327 PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices)); 1328 } 1329 lrow = row - brstart; 1330 1331 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1332 if (!v) {pvA = NULL; pvB = NULL;} 1333 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1334 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1335 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1336 nztot = nzA + nzB; 1337 1338 cmap = mat->garray; 1339 if (v || idx) { 1340 if (nztot) { 1341 /* Sort by increasing column numbers, assuming A and B already sorted */ 1342 PetscInt imark = -1; 1343 if (v) { 1344 *v = v_p = mat->rowvalues; 1345 for (i=0; i<nzB; i++) { 1346 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1347 else break; 1348 } 1349 imark = i; 1350 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1351 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1352 } 1353 if (idx) { 1354 *idx = idx_p = mat->rowindices; 1355 if (imark > -1) { 1356 for (i=0; i<imark; i++) { 1357 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1358 } 1359 } else { 1360 for (i=0; i<nzB; i++) { 1361 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1362 else break; 1363 } 1364 imark = i; 1365 } 1366 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1367 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1368 } 1369 } else { 1370 if (idx) *idx = NULL; 1371 if (v) *v = NULL; 1372 } 1373 } 1374 *nz = nztot; 1375 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1376 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1381 { 1382 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1383 1384 PetscFunctionBegin; 1385 PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1386 baij->getrowactive = PETSC_FALSE; 1387 PetscFunctionReturn(0); 1388 } 1389 1390 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1391 { 1392 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1393 1394 PetscFunctionBegin; 1395 PetscCall(MatZeroEntries(l->A)); 1396 PetscCall(MatZeroEntries(l->B)); 1397 PetscFunctionReturn(0); 1398 } 1399 1400 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1401 { 1402 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1403 Mat A = a->A,B = a->B; 1404 PetscLogDouble isend[5],irecv[5]; 1405 1406 PetscFunctionBegin; 1407 info->block_size = (PetscReal)matin->rmap->bs; 1408 1409 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1410 1411 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1412 isend[3] = info->memory; isend[4] = info->mallocs; 1413 1414 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1415 1416 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1417 isend[3] += info->memory; isend[4] += info->mallocs; 1418 1419 if (flag == MAT_LOCAL) { 1420 info->nz_used = isend[0]; 1421 info->nz_allocated = isend[1]; 1422 info->nz_unneeded = isend[2]; 1423 info->memory = isend[3]; 1424 info->mallocs = isend[4]; 1425 } else if (flag == MAT_GLOBAL_MAX) { 1426 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1427 1428 info->nz_used = irecv[0]; 1429 info->nz_allocated = irecv[1]; 1430 info->nz_unneeded = irecv[2]; 1431 info->memory = irecv[3]; 1432 info->mallocs = irecv[4]; 1433 } else if (flag == MAT_GLOBAL_SUM) { 1434 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1435 1436 info->nz_used = irecv[0]; 1437 info->nz_allocated = irecv[1]; 1438 info->nz_unneeded = irecv[2]; 1439 info->memory = irecv[3]; 1440 info->mallocs = irecv[4]; 1441 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1442 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1443 info->fill_ratio_needed = 0; 1444 info->factor_mallocs = 0; 1445 PetscFunctionReturn(0); 1446 } 1447 1448 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1449 { 1450 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1451 1452 PetscFunctionBegin; 1453 switch (op) { 1454 case MAT_NEW_NONZERO_LOCATIONS: 1455 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1456 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1457 case MAT_KEEP_NONZERO_PATTERN: 1458 case MAT_NEW_NONZERO_LOCATION_ERR: 1459 MatCheckPreallocated(A,1); 1460 PetscCall(MatSetOption(a->A,op,flg)); 1461 PetscCall(MatSetOption(a->B,op,flg)); 1462 break; 1463 case MAT_ROW_ORIENTED: 1464 MatCheckPreallocated(A,1); 1465 a->roworiented = flg; 1466 1467 PetscCall(MatSetOption(a->A,op,flg)); 1468 PetscCall(MatSetOption(a->B,op,flg)); 1469 break; 1470 case MAT_FORCE_DIAGONAL_ENTRIES: 1471 case MAT_SORTED_FULL: 1472 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1473 break; 1474 case MAT_IGNORE_OFF_PROC_ENTRIES: 1475 a->donotstash = flg; 1476 break; 1477 case MAT_USE_HASH_TABLE: 1478 a->ht_flag = flg; 1479 a->ht_fact = 1.39; 1480 break; 1481 case MAT_SYMMETRIC: 1482 case MAT_STRUCTURALLY_SYMMETRIC: 1483 case MAT_HERMITIAN: 1484 case MAT_SUBMAT_SINGLEIS: 1485 case MAT_SYMMETRY_ETERNAL: 1486 MatCheckPreallocated(A,1); 1487 PetscCall(MatSetOption(a->A,op,flg)); 1488 break; 1489 default: 1490 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1491 } 1492 PetscFunctionReturn(0); 1493 } 1494 1495 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1496 { 1497 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1498 Mat_SeqBAIJ *Aloc; 1499 Mat B; 1500 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1501 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1502 MatScalar *a; 1503 1504 PetscFunctionBegin; 1505 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1506 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1507 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1508 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1509 /* Do not know preallocation information, but must set block size */ 1510 PetscCall(MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL)); 1511 } else { 1512 B = *matout; 1513 } 1514 1515 /* copy over the A part */ 1516 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1517 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1518 PetscCall(PetscMalloc1(bs,&rvals)); 1519 1520 for (i=0; i<mbs; i++) { 1521 rvals[0] = bs*(baij->rstartbs + i); 1522 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1523 for (j=ai[i]; j<ai[i+1]; j++) { 1524 col = (baij->cstartbs+aj[j])*bs; 1525 for (k=0; k<bs; k++) { 1526 PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES)); 1527 1528 col++; a += bs; 1529 } 1530 } 1531 } 1532 /* copy over the B part */ 1533 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1534 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1535 for (i=0; i<mbs; i++) { 1536 rvals[0] = bs*(baij->rstartbs + i); 1537 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1538 for (j=ai[i]; j<ai[i+1]; j++) { 1539 col = baij->garray[aj[j]]*bs; 1540 for (k=0; k<bs; k++) { 1541 PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES)); 1542 col++; 1543 a += bs; 1544 } 1545 } 1546 } 1547 PetscCall(PetscFree(rvals)); 1548 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1549 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1550 1551 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1552 else { 1553 PetscCall(MatHeaderMerge(A,&B)); 1554 } 1555 PetscFunctionReturn(0); 1556 } 1557 1558 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1559 { 1560 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1561 Mat a = baij->A,b = baij->B; 1562 PetscInt s1,s2,s3; 1563 1564 PetscFunctionBegin; 1565 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1566 if (rr) { 1567 PetscCall(VecGetLocalSize(rr,&s1)); 1568 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1569 /* Overlap communication with computation. */ 1570 PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1571 } 1572 if (ll) { 1573 PetscCall(VecGetLocalSize(ll,&s1)); 1574 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1575 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1576 } 1577 /* scale the diagonal block */ 1578 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1579 1580 if (rr) { 1581 /* Do a scatter end and then right scale the off-diagonal block */ 1582 PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1583 PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec)); 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 1588 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1589 { 1590 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1591 PetscInt *lrows; 1592 PetscInt r, len; 1593 PetscBool cong; 1594 1595 PetscFunctionBegin; 1596 /* get locally owned rows */ 1597 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 1598 /* fix right hand side if needed */ 1599 if (x && b) { 1600 const PetscScalar *xx; 1601 PetscScalar *bb; 1602 1603 PetscCall(VecGetArrayRead(x,&xx)); 1604 PetscCall(VecGetArray(b,&bb)); 1605 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1606 PetscCall(VecRestoreArrayRead(x,&xx)); 1607 PetscCall(VecRestoreArray(b,&bb)); 1608 } 1609 1610 /* actually zap the local rows */ 1611 /* 1612 Zero the required rows. If the "diagonal block" of the matrix 1613 is square and the user wishes to set the diagonal we use separate 1614 code so that MatSetValues() is not called for each diagonal allocating 1615 new memory, thus calling lots of mallocs and slowing things down. 1616 1617 */ 1618 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1619 PetscCall(MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL)); 1620 PetscCall(MatHasCongruentLayouts(A,&cong)); 1621 if ((diag != 0.0) && cong) { 1622 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL)); 1623 } else if (diag != 0.0) { 1624 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL)); 1625 PetscCheck(!((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1626 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1627 for (r = 0; r < len; ++r) { 1628 const PetscInt row = lrows[r] + A->rmap->rstart; 1629 PetscCall(MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES)); 1630 } 1631 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1632 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 1633 } else { 1634 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL)); 1635 } 1636 PetscCall(PetscFree(lrows)); 1637 1638 /* only change matrix nonzero state if pattern was allowed to be changed */ 1639 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1640 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1641 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 1642 } 1643 PetscFunctionReturn(0); 1644 } 1645 1646 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1647 { 1648 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1649 PetscMPIInt n = A->rmap->n,p = 0; 1650 PetscInt i,j,k,r,len = 0,row,col,count; 1651 PetscInt *lrows,*owners = A->rmap->range; 1652 PetscSFNode *rrows; 1653 PetscSF sf; 1654 const PetscScalar *xx; 1655 PetscScalar *bb,*mask; 1656 Vec xmask,lmask; 1657 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1658 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1659 PetscScalar *aa; 1660 1661 PetscFunctionBegin; 1662 /* Create SF where leaves are input rows and roots are owned rows */ 1663 PetscCall(PetscMalloc1(n, &lrows)); 1664 for (r = 0; r < n; ++r) lrows[r] = -1; 1665 PetscCall(PetscMalloc1(N, &rrows)); 1666 for (r = 0; r < N; ++r) { 1667 const PetscInt idx = rows[r]; 1668 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 1669 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1670 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 1671 } 1672 rrows[r].rank = p; 1673 rrows[r].index = rows[r] - owners[p]; 1674 } 1675 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 1676 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1677 /* Collect flags for rows to be zeroed */ 1678 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 1679 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 1680 PetscCall(PetscSFDestroy(&sf)); 1681 /* Compress and put in row numbers */ 1682 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1683 /* zero diagonal part of matrix */ 1684 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 1685 /* handle off diagonal part of matrix */ 1686 PetscCall(MatCreateVecs(A,&xmask,NULL)); 1687 PetscCall(VecDuplicate(l->lvec,&lmask)); 1688 PetscCall(VecGetArray(xmask,&bb)); 1689 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1690 PetscCall(VecRestoreArray(xmask,&bb)); 1691 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 1692 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 1693 PetscCall(VecDestroy(&xmask)); 1694 if (x) { 1695 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1696 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1697 PetscCall(VecGetArrayRead(l->lvec,&xx)); 1698 PetscCall(VecGetArray(b,&bb)); 1699 } 1700 PetscCall(VecGetArray(lmask,&mask)); 1701 /* remove zeroed rows of off diagonal matrix */ 1702 for (i = 0; i < len; ++i) { 1703 row = lrows[i]; 1704 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1705 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1706 for (k = 0; k < count; ++k) { 1707 aa[0] = 0.0; 1708 aa += bs; 1709 } 1710 } 1711 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1712 for (i = 0; i < l->B->rmap->N; ++i) { 1713 row = i/bs; 1714 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1715 for (k = 0; k < bs; ++k) { 1716 col = bs*baij->j[j] + k; 1717 if (PetscAbsScalar(mask[col])) { 1718 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1719 if (x) bb[i] -= aa[0]*xx[col]; 1720 aa[0] = 0.0; 1721 } 1722 } 1723 } 1724 } 1725 if (x) { 1726 PetscCall(VecRestoreArray(b,&bb)); 1727 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 1728 } 1729 PetscCall(VecRestoreArray(lmask,&mask)); 1730 PetscCall(VecDestroy(&lmask)); 1731 PetscCall(PetscFree(lrows)); 1732 1733 /* only change matrix nonzero state if pattern was allowed to be changed */ 1734 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1735 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1736 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 1737 } 1738 PetscFunctionReturn(0); 1739 } 1740 1741 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1742 { 1743 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1744 1745 PetscFunctionBegin; 1746 PetscCall(MatSetUnfactored(a->A)); 1747 PetscFunctionReturn(0); 1748 } 1749 1750 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1751 1752 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1753 { 1754 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1755 Mat a,b,c,d; 1756 PetscBool flg; 1757 1758 PetscFunctionBegin; 1759 a = matA->A; b = matA->B; 1760 c = matB->A; d = matB->B; 1761 1762 PetscCall(MatEqual(a,c,&flg)); 1763 if (flg) { 1764 PetscCall(MatEqual(b,d,&flg)); 1765 } 1766 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1767 PetscFunctionReturn(0); 1768 } 1769 1770 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1771 { 1772 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1773 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1774 1775 PetscFunctionBegin; 1776 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1777 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1778 PetscCall(MatCopy_Basic(A,B,str)); 1779 } else { 1780 PetscCall(MatCopy(a->A,b->A,str)); 1781 PetscCall(MatCopy(a->B,b->B,str)); 1782 } 1783 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1784 PetscFunctionReturn(0); 1785 } 1786 1787 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1788 { 1789 PetscFunctionBegin; 1790 PetscCall(MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1795 { 1796 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1797 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1798 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1799 1800 PetscFunctionBegin; 1801 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 1802 PetscFunctionReturn(0); 1803 } 1804 1805 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1806 { 1807 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1808 PetscBLASInt bnz,one=1; 1809 Mat_SeqBAIJ *x,*y; 1810 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1811 1812 PetscFunctionBegin; 1813 if (str == SAME_NONZERO_PATTERN) { 1814 PetscScalar alpha = a; 1815 x = (Mat_SeqBAIJ*)xx->A->data; 1816 y = (Mat_SeqBAIJ*)yy->A->data; 1817 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 1818 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1819 x = (Mat_SeqBAIJ*)xx->B->data; 1820 y = (Mat_SeqBAIJ*)yy->B->data; 1821 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 1822 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1823 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1824 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1825 PetscCall(MatAXPY_Basic(Y,a,X,str)); 1826 } else { 1827 Mat B; 1828 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1829 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 1830 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 1831 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 1832 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 1833 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 1834 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 1835 PetscCall(MatSetType(B,MATMPIBAIJ)); 1836 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d)); 1837 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 1838 PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o)); 1839 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1840 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 1841 PetscCall(MatHeaderMerge(Y,&B)); 1842 PetscCall(PetscFree(nnz_d)); 1843 PetscCall(PetscFree(nnz_o)); 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1849 { 1850 PetscFunctionBegin; 1851 if (PetscDefined(USE_COMPLEX)) { 1852 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1853 1854 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1855 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1856 } 1857 PetscFunctionReturn(0); 1858 } 1859 1860 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1861 { 1862 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1863 1864 PetscFunctionBegin; 1865 PetscCall(MatRealPart(a->A)); 1866 PetscCall(MatRealPart(a->B)); 1867 PetscFunctionReturn(0); 1868 } 1869 1870 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1871 { 1872 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1873 1874 PetscFunctionBegin; 1875 PetscCall(MatImaginaryPart(a->A)); 1876 PetscCall(MatImaginaryPart(a->B)); 1877 PetscFunctionReturn(0); 1878 } 1879 1880 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1881 { 1882 IS iscol_local; 1883 PetscInt csize; 1884 1885 PetscFunctionBegin; 1886 PetscCall(ISGetLocalSize(iscol,&csize)); 1887 if (call == MAT_REUSE_MATRIX) { 1888 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 1889 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1890 } else { 1891 PetscCall(ISAllGather(iscol,&iscol_local)); 1892 } 1893 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat)); 1894 if (call == MAT_INITIAL_MATRIX) { 1895 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 1896 PetscCall(ISDestroy(&iscol_local)); 1897 } 1898 PetscFunctionReturn(0); 1899 } 1900 1901 /* 1902 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1903 in local and then by concatenating the local matrices the end result. 1904 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1905 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1906 */ 1907 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1908 { 1909 PetscMPIInt rank,size; 1910 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1911 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1912 Mat M,Mreuse; 1913 MatScalar *vwork,*aa; 1914 MPI_Comm comm; 1915 IS isrow_new, iscol_new; 1916 Mat_SeqBAIJ *aij; 1917 1918 PetscFunctionBegin; 1919 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 1920 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 1921 PetscCallMPI(MPI_Comm_size(comm,&size)); 1922 /* The compression and expansion should be avoided. Doesn't point 1923 out errors, might change the indices, hence buggey */ 1924 PetscCall(ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new)); 1925 PetscCall(ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new)); 1926 1927 if (call == MAT_REUSE_MATRIX) { 1928 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 1929 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1930 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse)); 1931 } else { 1932 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse)); 1933 } 1934 PetscCall(ISDestroy(&isrow_new)); 1935 PetscCall(ISDestroy(&iscol_new)); 1936 /* 1937 m - number of local rows 1938 n - number of columns (same on all processors) 1939 rstart - first row in new global matrix generated 1940 */ 1941 PetscCall(MatGetBlockSize(mat,&bs)); 1942 PetscCall(MatGetSize(Mreuse,&m,&n)); 1943 m = m/bs; 1944 n = n/bs; 1945 1946 if (call == MAT_INITIAL_MATRIX) { 1947 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1948 ii = aij->i; 1949 jj = aij->j; 1950 1951 /* 1952 Determine the number of non-zeros in the diagonal and off-diagonal 1953 portions of the matrix in order to do correct preallocation 1954 */ 1955 1956 /* first get start and end of "diagonal" columns */ 1957 if (csize == PETSC_DECIDE) { 1958 PetscCall(ISGetSize(isrow,&mglobal)); 1959 if (mglobal == n*bs) { /* square matrix */ 1960 nlocal = m; 1961 } else { 1962 nlocal = n/size + ((n % size) > rank); 1963 } 1964 } else { 1965 nlocal = csize/bs; 1966 } 1967 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 1968 rstart = rend - nlocal; 1969 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 1970 1971 /* next, compute all the lengths */ 1972 PetscCall(PetscMalloc2(m+1,&dlens,m+1,&olens)); 1973 for (i=0; i<m; i++) { 1974 jend = ii[i+1] - ii[i]; 1975 olen = 0; 1976 dlen = 0; 1977 for (j=0; j<jend; j++) { 1978 if (*jj < rstart || *jj >= rend) olen++; 1979 else dlen++; 1980 jj++; 1981 } 1982 olens[i] = olen; 1983 dlens[i] = dlen; 1984 } 1985 PetscCall(MatCreate(comm,&M)); 1986 PetscCall(MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n)); 1987 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 1988 PetscCall(MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens)); 1989 PetscCall(MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens)); 1990 PetscCall(PetscFree2(dlens,olens)); 1991 } else { 1992 PetscInt ml,nl; 1993 1994 M = *newmat; 1995 PetscCall(MatGetLocalSize(M,&ml,&nl)); 1996 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1997 PetscCall(MatZeroEntries(M)); 1998 /* 1999 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2000 rather than the slower MatSetValues(). 2001 */ 2002 M->was_assembled = PETSC_TRUE; 2003 M->assembled = PETSC_FALSE; 2004 } 2005 PetscCall(MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE)); 2006 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 2007 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2008 ii = aij->i; 2009 jj = aij->j; 2010 aa = aij->a; 2011 for (i=0; i<m; i++) { 2012 row = rstart/bs + i; 2013 nz = ii[i+1] - ii[i]; 2014 cwork = jj; jj += nz; 2015 vwork = aa; aa += nz*bs*bs; 2016 PetscCall(MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 2017 } 2018 2019 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 2020 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 2021 *newmat = M; 2022 2023 /* save submatrix used in processor for next request */ 2024 if (call == MAT_INITIAL_MATRIX) { 2025 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 2026 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2027 } 2028 PetscFunctionReturn(0); 2029 } 2030 2031 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2032 { 2033 MPI_Comm comm,pcomm; 2034 PetscInt clocal_size,nrows; 2035 const PetscInt *rows; 2036 PetscMPIInt size; 2037 IS crowp,lcolp; 2038 2039 PetscFunctionBegin; 2040 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2041 /* make a collective version of 'rowp' */ 2042 PetscCall(PetscObjectGetComm((PetscObject)rowp,&pcomm)); 2043 if (pcomm==comm) { 2044 crowp = rowp; 2045 } else { 2046 PetscCall(ISGetSize(rowp,&nrows)); 2047 PetscCall(ISGetIndices(rowp,&rows)); 2048 PetscCall(ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp)); 2049 PetscCall(ISRestoreIndices(rowp,&rows)); 2050 } 2051 PetscCall(ISSetPermutation(crowp)); 2052 /* make a local version of 'colp' */ 2053 PetscCall(PetscObjectGetComm((PetscObject)colp,&pcomm)); 2054 PetscCallMPI(MPI_Comm_size(pcomm,&size)); 2055 if (size==1) { 2056 lcolp = colp; 2057 } else { 2058 PetscCall(ISAllGather(colp,&lcolp)); 2059 } 2060 PetscCall(ISSetPermutation(lcolp)); 2061 /* now we just get the submatrix */ 2062 PetscCall(MatGetLocalSize(A,NULL,&clocal_size)); 2063 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B)); 2064 /* clean up */ 2065 if (pcomm!=comm) { 2066 PetscCall(ISDestroy(&crowp)); 2067 } 2068 if (size>1) { 2069 PetscCall(ISDestroy(&lcolp)); 2070 } 2071 PetscFunctionReturn(0); 2072 } 2073 2074 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2075 { 2076 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2077 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2078 2079 PetscFunctionBegin; 2080 if (nghosts) *nghosts = B->nbs; 2081 if (ghosts) *ghosts = baij->garray; 2082 PetscFunctionReturn(0); 2083 } 2084 2085 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2086 { 2087 Mat B; 2088 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2089 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2090 Mat_SeqAIJ *b; 2091 PetscMPIInt size,rank,*recvcounts = NULL,*displs = NULL; 2092 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2093 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2094 2095 PetscFunctionBegin; 2096 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 2097 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 2098 2099 /* ---------------------------------------------------------------- 2100 Tell every processor the number of nonzeros per row 2101 */ 2102 PetscCall(PetscMalloc1(A->rmap->N/bs,&lens)); 2103 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2104 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2105 } 2106 PetscCall(PetscMalloc1(2*size,&recvcounts)); 2107 displs = recvcounts + size; 2108 for (i=0; i<size; i++) { 2109 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2110 displs[i] = A->rmap->range[i]/bs; 2111 } 2112 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A))); 2113 /* --------------------------------------------------------------- 2114 Create the sequential matrix of the same type as the local block diagonal 2115 */ 2116 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 2117 PetscCall(MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE)); 2118 PetscCall(MatSetType(B,MATSEQAIJ)); 2119 PetscCall(MatSeqAIJSetPreallocation(B,0,lens)); 2120 b = (Mat_SeqAIJ*)B->data; 2121 2122 /*-------------------------------------------------------------------- 2123 Copy my part of matrix column indices over 2124 */ 2125 sendcount = ad->nz + bd->nz; 2126 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2127 a_jsendbuf = ad->j; 2128 b_jsendbuf = bd->j; 2129 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2130 cnt = 0; 2131 for (i=0; i<n; i++) { 2132 2133 /* put in lower diagonal portion */ 2134 m = bd->i[i+1] - bd->i[i]; 2135 while (m > 0) { 2136 /* is it above diagonal (in bd (compressed) numbering) */ 2137 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2138 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2139 m--; 2140 } 2141 2142 /* put in diagonal portion */ 2143 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2144 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2145 } 2146 2147 /* put in upper diagonal portion */ 2148 while (m-- > 0) { 2149 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2150 } 2151 } 2152 PetscCheck(cnt == sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt); 2153 2154 /*-------------------------------------------------------------------- 2155 Gather all column indices to all processors 2156 */ 2157 for (i=0; i<size; i++) { 2158 recvcounts[i] = 0; 2159 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2160 recvcounts[i] += lens[j]; 2161 } 2162 } 2163 displs[0] = 0; 2164 for (i=1; i<size; i++) { 2165 displs[i] = displs[i-1] + recvcounts[i-1]; 2166 } 2167 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A))); 2168 /*-------------------------------------------------------------------- 2169 Assemble the matrix into useable form (note numerical values not yet set) 2170 */ 2171 /* set the b->ilen (length of each row) values */ 2172 PetscCall(PetscArraycpy(b->ilen,lens,A->rmap->N/bs)); 2173 /* set the b->i indices */ 2174 b->i[0] = 0; 2175 for (i=1; i<=A->rmap->N/bs; i++) { 2176 b->i[i] = b->i[i-1] + lens[i-1]; 2177 } 2178 PetscCall(PetscFree(lens)); 2179 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2180 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2181 PetscCall(PetscFree(recvcounts)); 2182 2183 if (A->symmetric) PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE)); 2184 else if (A->hermitian) PetscCall(MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE)); 2185 else if (A->structurally_symmetric) PetscCall(MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE)); 2186 *newmat = B; 2187 PetscFunctionReturn(0); 2188 } 2189 2190 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2191 { 2192 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2193 Vec bb1 = NULL; 2194 2195 PetscFunctionBegin; 2196 if (flag == SOR_APPLY_UPPER) { 2197 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2198 PetscFunctionReturn(0); 2199 } 2200 2201 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2202 PetscCall(VecDuplicate(bb,&bb1)); 2203 } 2204 2205 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2206 if (flag & SOR_ZERO_INITIAL_GUESS) { 2207 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2208 its--; 2209 } 2210 2211 while (its--) { 2212 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2213 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2214 2215 /* update rhs: bb1 = bb - B*x */ 2216 PetscCall(VecScale(mat->lvec,-1.0)); 2217 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2218 2219 /* local sweep */ 2220 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 2221 } 2222 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2223 if (flag & SOR_ZERO_INITIAL_GUESS) { 2224 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2225 its--; 2226 } 2227 while (its--) { 2228 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2229 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2230 2231 /* update rhs: bb1 = bb - B*x */ 2232 PetscCall(VecScale(mat->lvec,-1.0)); 2233 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2234 2235 /* local sweep */ 2236 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 2237 } 2238 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2239 if (flag & SOR_ZERO_INITIAL_GUESS) { 2240 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2241 its--; 2242 } 2243 while (its--) { 2244 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2245 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2246 2247 /* update rhs: bb1 = bb - B*x */ 2248 PetscCall(VecScale(mat->lvec,-1.0)); 2249 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2250 2251 /* local sweep */ 2252 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 2253 } 2254 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2255 2256 PetscCall(VecDestroy(&bb1)); 2257 PetscFunctionReturn(0); 2258 } 2259 2260 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions) 2261 { 2262 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2263 PetscInt m,N,i,*garray = aij->garray; 2264 PetscInt ib,jb,bs = A->rmap->bs; 2265 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2266 MatScalar *a_val = a_aij->a; 2267 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2268 MatScalar *b_val = b_aij->a; 2269 PetscReal *work; 2270 2271 PetscFunctionBegin; 2272 PetscCall(MatGetSize(A,&m,&N)); 2273 PetscCall(PetscCalloc1(N,&work)); 2274 if (type == NORM_2) { 2275 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2276 for (jb=0; jb<bs; jb++) { 2277 for (ib=0; ib<bs; ib++) { 2278 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2279 a_val++; 2280 } 2281 } 2282 } 2283 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2284 for (jb=0; jb<bs; jb++) { 2285 for (ib=0; ib<bs; ib++) { 2286 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2287 b_val++; 2288 } 2289 } 2290 } 2291 } else if (type == NORM_1) { 2292 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2293 for (jb=0; jb<bs; jb++) { 2294 for (ib=0; ib<bs; ib++) { 2295 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2296 a_val++; 2297 } 2298 } 2299 } 2300 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2301 for (jb=0; jb<bs; jb++) { 2302 for (ib=0; ib<bs; ib++) { 2303 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2304 b_val++; 2305 } 2306 } 2307 } 2308 } else if (type == NORM_INFINITY) { 2309 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2310 for (jb=0; jb<bs; jb++) { 2311 for (ib=0; ib<bs; ib++) { 2312 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2313 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2314 a_val++; 2315 } 2316 } 2317 } 2318 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2319 for (jb=0; jb<bs; jb++) { 2320 for (ib=0; ib<bs; ib++) { 2321 int col = garray[b_aij->j[i]] * bs + jb; 2322 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2323 b_val++; 2324 } 2325 } 2326 } 2327 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2328 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2329 for (jb=0; jb<bs; jb++) { 2330 for (ib=0; ib<bs; ib++) { 2331 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2332 a_val++; 2333 } 2334 } 2335 } 2336 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2337 for (jb=0; jb<bs; jb++) { 2338 for (ib=0; ib<bs; ib++) { 2339 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2340 b_val++; 2341 } 2342 } 2343 } 2344 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2345 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2346 for (jb=0; jb<bs; jb++) { 2347 for (ib=0; ib<bs; ib++) { 2348 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2349 a_val++; 2350 } 2351 } 2352 } 2353 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2354 for (jb=0; jb<bs; jb++) { 2355 for (ib=0; ib<bs; ib++) { 2356 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2357 b_val++; 2358 } 2359 } 2360 } 2361 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 2362 if (type == NORM_INFINITY) { 2363 PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 2364 } else { 2365 PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 2366 } 2367 PetscCall(PetscFree(work)); 2368 if (type == NORM_2) { 2369 for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2370 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2371 for (i=0; i<N; i++) reductions[i] /= m; 2372 } 2373 PetscFunctionReturn(0); 2374 } 2375 2376 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2377 { 2378 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2379 2380 PetscFunctionBegin; 2381 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2382 A->factorerrortype = a->A->factorerrortype; 2383 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2384 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2385 PetscFunctionReturn(0); 2386 } 2387 2388 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2389 { 2390 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2391 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2392 2393 PetscFunctionBegin; 2394 if (!Y->preallocated) { 2395 PetscCall(MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL)); 2396 } else if (!aij->nz) { 2397 PetscInt nonew = aij->nonew; 2398 PetscCall(MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL)); 2399 aij->nonew = nonew; 2400 } 2401 PetscCall(MatShift_Basic(Y,a)); 2402 PetscFunctionReturn(0); 2403 } 2404 2405 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2406 { 2407 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2408 2409 PetscFunctionBegin; 2410 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2411 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2412 if (d) { 2413 PetscInt rstart; 2414 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2415 *d += rstart/A->rmap->bs; 2416 2417 } 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2422 { 2423 PetscFunctionBegin; 2424 *a = ((Mat_MPIBAIJ*)A->data)->A; 2425 PetscFunctionReturn(0); 2426 } 2427 2428 /* -------------------------------------------------------------------*/ 2429 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2430 MatGetRow_MPIBAIJ, 2431 MatRestoreRow_MPIBAIJ, 2432 MatMult_MPIBAIJ, 2433 /* 4*/ MatMultAdd_MPIBAIJ, 2434 MatMultTranspose_MPIBAIJ, 2435 MatMultTransposeAdd_MPIBAIJ, 2436 NULL, 2437 NULL, 2438 NULL, 2439 /*10*/ NULL, 2440 NULL, 2441 NULL, 2442 MatSOR_MPIBAIJ, 2443 MatTranspose_MPIBAIJ, 2444 /*15*/ MatGetInfo_MPIBAIJ, 2445 MatEqual_MPIBAIJ, 2446 MatGetDiagonal_MPIBAIJ, 2447 MatDiagonalScale_MPIBAIJ, 2448 MatNorm_MPIBAIJ, 2449 /*20*/ MatAssemblyBegin_MPIBAIJ, 2450 MatAssemblyEnd_MPIBAIJ, 2451 MatSetOption_MPIBAIJ, 2452 MatZeroEntries_MPIBAIJ, 2453 /*24*/ MatZeroRows_MPIBAIJ, 2454 NULL, 2455 NULL, 2456 NULL, 2457 NULL, 2458 /*29*/ MatSetUp_MPIBAIJ, 2459 NULL, 2460 NULL, 2461 MatGetDiagonalBlock_MPIBAIJ, 2462 NULL, 2463 /*34*/ MatDuplicate_MPIBAIJ, 2464 NULL, 2465 NULL, 2466 NULL, 2467 NULL, 2468 /*39*/ MatAXPY_MPIBAIJ, 2469 MatCreateSubMatrices_MPIBAIJ, 2470 MatIncreaseOverlap_MPIBAIJ, 2471 MatGetValues_MPIBAIJ, 2472 MatCopy_MPIBAIJ, 2473 /*44*/ NULL, 2474 MatScale_MPIBAIJ, 2475 MatShift_MPIBAIJ, 2476 NULL, 2477 MatZeroRowsColumns_MPIBAIJ, 2478 /*49*/ NULL, 2479 NULL, 2480 NULL, 2481 NULL, 2482 NULL, 2483 /*54*/ MatFDColoringCreate_MPIXAIJ, 2484 NULL, 2485 MatSetUnfactored_MPIBAIJ, 2486 MatPermute_MPIBAIJ, 2487 MatSetValuesBlocked_MPIBAIJ, 2488 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2489 MatDestroy_MPIBAIJ, 2490 MatView_MPIBAIJ, 2491 NULL, 2492 NULL, 2493 /*64*/ NULL, 2494 NULL, 2495 NULL, 2496 NULL, 2497 NULL, 2498 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2499 NULL, 2500 NULL, 2501 NULL, 2502 NULL, 2503 /*74*/ NULL, 2504 MatFDColoringApply_BAIJ, 2505 NULL, 2506 NULL, 2507 NULL, 2508 /*79*/ NULL, 2509 NULL, 2510 NULL, 2511 NULL, 2512 MatLoad_MPIBAIJ, 2513 /*84*/ NULL, 2514 NULL, 2515 NULL, 2516 NULL, 2517 NULL, 2518 /*89*/ NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 NULL, 2523 /*94*/ NULL, 2524 NULL, 2525 NULL, 2526 NULL, 2527 NULL, 2528 /*99*/ NULL, 2529 NULL, 2530 NULL, 2531 MatConjugate_MPIBAIJ, 2532 NULL, 2533 /*104*/NULL, 2534 MatRealPart_MPIBAIJ, 2535 MatImaginaryPart_MPIBAIJ, 2536 NULL, 2537 NULL, 2538 /*109*/NULL, 2539 NULL, 2540 NULL, 2541 NULL, 2542 MatMissingDiagonal_MPIBAIJ, 2543 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2544 NULL, 2545 MatGetGhosts_MPIBAIJ, 2546 NULL, 2547 NULL, 2548 /*119*/NULL, 2549 NULL, 2550 NULL, 2551 NULL, 2552 MatGetMultiProcBlock_MPIBAIJ, 2553 /*124*/NULL, 2554 MatGetColumnReductions_MPIBAIJ, 2555 MatInvertBlockDiagonal_MPIBAIJ, 2556 NULL, 2557 NULL, 2558 /*129*/ NULL, 2559 NULL, 2560 NULL, 2561 NULL, 2562 NULL, 2563 /*134*/ NULL, 2564 NULL, 2565 NULL, 2566 NULL, 2567 NULL, 2568 /*139*/ MatSetBlockSizes_Default, 2569 NULL, 2570 NULL, 2571 MatFDColoringSetUp_MPIXAIJ, 2572 NULL, 2573 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2574 NULL, 2575 NULL, 2576 NULL 2577 }; 2578 2579 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2580 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2581 2582 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2583 { 2584 PetscInt m,rstart,cstart,cend; 2585 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 2586 const PetscInt *JJ =NULL; 2587 PetscScalar *values=NULL; 2588 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2589 PetscBool nooffprocentries; 2590 2591 PetscFunctionBegin; 2592 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 2593 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 2594 PetscCall(PetscLayoutSetUp(B->rmap)); 2595 PetscCall(PetscLayoutSetUp(B->cmap)); 2596 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2597 m = B->rmap->n/bs; 2598 rstart = B->rmap->rstart/bs; 2599 cstart = B->cmap->rstart/bs; 2600 cend = B->cmap->rend/bs; 2601 2602 PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2603 PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz)); 2604 for (i=0; i<m; i++) { 2605 nz = ii[i+1] - ii[i]; 2606 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2607 nz_max = PetscMax(nz_max,nz); 2608 dlen = 0; 2609 olen = 0; 2610 JJ = jj + ii[i]; 2611 for (j=0; j<nz; j++) { 2612 if (*JJ < cstart || *JJ >= cend) olen++; 2613 else dlen++; 2614 JJ++; 2615 } 2616 d_nnz[i] = dlen; 2617 o_nnz[i] = olen; 2618 } 2619 PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz)); 2620 PetscCall(PetscFree2(d_nnz,o_nnz)); 2621 2622 values = (PetscScalar*)V; 2623 if (!values) { 2624 PetscCall(PetscCalloc1(bs*bs*nz_max,&values)); 2625 } 2626 for (i=0; i<m; i++) { 2627 PetscInt row = i + rstart; 2628 PetscInt ncols = ii[i+1] - ii[i]; 2629 const PetscInt *icols = jj + ii[i]; 2630 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2631 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2632 PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES)); 2633 } else { /* block ordering does not match so we can only insert one block at a time. */ 2634 PetscInt j; 2635 for (j=0; j<ncols; j++) { 2636 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2637 PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES)); 2638 } 2639 } 2640 } 2641 2642 if (!V) PetscCall(PetscFree(values)); 2643 nooffprocentries = B->nooffprocentries; 2644 B->nooffprocentries = PETSC_TRUE; 2645 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2646 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2647 B->nooffprocentries = nooffprocentries; 2648 2649 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2650 PetscFunctionReturn(0); 2651 } 2652 2653 /*@C 2654 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2655 2656 Collective 2657 2658 Input Parameters: 2659 + B - the matrix 2660 . bs - the block size 2661 . i - the indices into j for the start of each local row (starts with zero) 2662 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2663 - v - optional values in the matrix 2664 2665 Level: advanced 2666 2667 Notes: 2668 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2669 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2670 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2671 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2672 block column and the second index is over columns within a block. 2673 2674 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2675 2676 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2677 @*/ 2678 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2679 { 2680 PetscFunctionBegin; 2681 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2682 PetscValidType(B,1); 2683 PetscValidLogicalCollectiveInt(B,bs,2); 2684 PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 2685 PetscFunctionReturn(0); 2686 } 2687 2688 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2689 { 2690 Mat_MPIBAIJ *b; 2691 PetscInt i; 2692 PetscMPIInt size; 2693 2694 PetscFunctionBegin; 2695 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 2696 PetscCall(PetscLayoutSetUp(B->rmap)); 2697 PetscCall(PetscLayoutSetUp(B->cmap)); 2698 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2699 2700 if (d_nnz) { 2701 for (i=0; i<B->rmap->n/bs; i++) { 2702 PetscCheck(d_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]); 2703 } 2704 } 2705 if (o_nnz) { 2706 for (i=0; i<B->rmap->n/bs; i++) { 2707 PetscCheck(o_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]); 2708 } 2709 } 2710 2711 b = (Mat_MPIBAIJ*)B->data; 2712 b->bs2 = bs*bs; 2713 b->mbs = B->rmap->n/bs; 2714 b->nbs = B->cmap->n/bs; 2715 b->Mbs = B->rmap->N/bs; 2716 b->Nbs = B->cmap->N/bs; 2717 2718 for (i=0; i<=b->size; i++) { 2719 b->rangebs[i] = B->rmap->range[i]/bs; 2720 } 2721 b->rstartbs = B->rmap->rstart/bs; 2722 b->rendbs = B->rmap->rend/bs; 2723 b->cstartbs = B->cmap->rstart/bs; 2724 b->cendbs = B->cmap->rend/bs; 2725 2726 #if defined(PETSC_USE_CTABLE) 2727 PetscCall(PetscTableDestroy(&b->colmap)); 2728 #else 2729 PetscCall(PetscFree(b->colmap)); 2730 #endif 2731 PetscCall(PetscFree(b->garray)); 2732 PetscCall(VecDestroy(&b->lvec)); 2733 PetscCall(VecScatterDestroy(&b->Mvctx)); 2734 2735 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2736 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2737 PetscCall(MatDestroy(&b->B)); 2738 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2739 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2740 PetscCall(MatSetType(b->B,MATSEQBAIJ)); 2741 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2742 2743 if (!B->preallocated) { 2744 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2745 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2746 PetscCall(MatSetType(b->A,MATSEQBAIJ)); 2747 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2748 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash)); 2749 } 2750 2751 PetscCall(MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz)); 2752 PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz)); 2753 B->preallocated = PETSC_TRUE; 2754 B->was_assembled = PETSC_FALSE; 2755 B->assembled = PETSC_FALSE; 2756 PetscFunctionReturn(0); 2757 } 2758 2759 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2760 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2761 2762 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2763 { 2764 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2765 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2766 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2767 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2768 2769 PetscFunctionBegin; 2770 PetscCall(PetscMalloc1(M+1,&ii)); 2771 ii[0] = 0; 2772 for (i=0; i<M; i++) { 2773 PetscCheck((id[i+1] - id[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]); 2774 PetscCheck((io[i+1] - io[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]); 2775 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2776 /* remove one from count of matrix has diagonal */ 2777 for (j=id[i]; j<id[i+1]; j++) { 2778 if (jd[j] == i) {ii[i+1]--;break;} 2779 } 2780 } 2781 PetscCall(PetscMalloc1(ii[M],&jj)); 2782 cnt = 0; 2783 for (i=0; i<M; i++) { 2784 for (j=io[i]; j<io[i+1]; j++) { 2785 if (garray[jo[j]] > rstart) break; 2786 jj[cnt++] = garray[jo[j]]; 2787 } 2788 for (k=id[i]; k<id[i+1]; k++) { 2789 if (jd[k] != i) { 2790 jj[cnt++] = rstart + jd[k]; 2791 } 2792 } 2793 for (; j<io[i+1]; j++) { 2794 jj[cnt++] = garray[jo[j]]; 2795 } 2796 } 2797 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj)); 2798 PetscFunctionReturn(0); 2799 } 2800 2801 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2802 2803 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2804 2805 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2806 { 2807 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2808 Mat_MPIAIJ *b; 2809 Mat B; 2810 2811 PetscFunctionBegin; 2812 PetscCheck(A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2813 2814 if (reuse == MAT_REUSE_MATRIX) { 2815 B = *newmat; 2816 } else { 2817 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 2818 PetscCall(MatSetType(B,MATMPIAIJ)); 2819 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2820 PetscCall(MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs)); 2821 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 2822 PetscCall(MatMPIAIJSetPreallocation(B,0,NULL,0,NULL)); 2823 } 2824 b = (Mat_MPIAIJ*) B->data; 2825 2826 if (reuse == MAT_REUSE_MATRIX) { 2827 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2828 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2829 } else { 2830 PetscCall(MatDestroy(&b->A)); 2831 PetscCall(MatDestroy(&b->B)); 2832 PetscCall(MatDisAssemble_MPIBAIJ(A)); 2833 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2834 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2835 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 2836 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 2837 } 2838 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2839 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2840 2841 if (reuse == MAT_INPLACE_MATRIX) { 2842 PetscCall(MatHeaderReplace(A,&B)); 2843 } else { 2844 *newmat = B; 2845 } 2846 PetscFunctionReturn(0); 2847 } 2848 2849 /*MC 2850 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2851 2852 Options Database Keys: 2853 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2854 . -mat_block_size <bs> - set the blocksize used to store the matrix 2855 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2856 - -mat_use_hash_table <fact> - set hash table factor 2857 2858 Level: beginner 2859 2860 Notes: 2861 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 2862 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 2863 2864 .seealso: `MatCreateBAIJ` 2865 M*/ 2866 2867 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2868 2869 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2870 { 2871 Mat_MPIBAIJ *b; 2872 PetscBool flg = PETSC_FALSE; 2873 2874 PetscFunctionBegin; 2875 PetscCall(PetscNewLog(B,&b)); 2876 B->data = (void*)b; 2877 2878 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 2879 B->assembled = PETSC_FALSE; 2880 2881 B->insertmode = NOT_SET_VALUES; 2882 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 2883 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size)); 2884 2885 /* build local table of row and column ownerships */ 2886 PetscCall(PetscMalloc1(b->size+1,&b->rangebs)); 2887 2888 /* build cache for off array entries formed */ 2889 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 2890 2891 b->donotstash = PETSC_FALSE; 2892 b->colmap = NULL; 2893 b->garray = NULL; 2894 b->roworiented = PETSC_TRUE; 2895 2896 /* stuff used in block assembly */ 2897 b->barray = NULL; 2898 2899 /* stuff used for matrix vector multiply */ 2900 b->lvec = NULL; 2901 b->Mvctx = NULL; 2902 2903 /* stuff for MatGetRow() */ 2904 b->rowindices = NULL; 2905 b->rowvalues = NULL; 2906 b->getrowactive = PETSC_FALSE; 2907 2908 /* hash table stuff */ 2909 b->ht = NULL; 2910 b->hd = NULL; 2911 b->ht_size = 0; 2912 b->ht_flag = PETSC_FALSE; 2913 b->ht_fact = 0; 2914 b->ht_total_ct = 0; 2915 b->ht_insert_ct = 0; 2916 2917 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2918 b->ijonly = PETSC_FALSE; 2919 2920 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj)); 2921 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ)); 2922 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ)); 2923 #if defined(PETSC_HAVE_HYPRE) 2924 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE)); 2925 #endif 2926 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ)); 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ)); 2928 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ)); 2929 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2930 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ)); 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ)); 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS)); 2933 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ)); 2934 2935 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat"); 2936 PetscCall(PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg)); 2937 if (flg) { 2938 PetscReal fact = 1.39; 2939 PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE)); 2940 PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL)); 2941 if (fact <= 1.0) fact = 1.39; 2942 PetscCall(MatMPIBAIJSetHashTableFactor(B,fact)); 2943 PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact)); 2944 } 2945 PetscOptionsEnd(); 2946 PetscFunctionReturn(0); 2947 } 2948 2949 /*MC 2950 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2951 2952 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2953 and MATMPIBAIJ otherwise. 2954 2955 Options Database Keys: 2956 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2957 2958 Level: beginner 2959 2960 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2961 M*/ 2962 2963 /*@C 2964 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2965 (block compressed row). For good matrix assembly performance 2966 the user should preallocate the matrix storage by setting the parameters 2967 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2968 performance can be increased by more than a factor of 50. 2969 2970 Collective on Mat 2971 2972 Input Parameters: 2973 + B - the matrix 2974 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2975 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2976 . d_nz - number of block nonzeros per block row in diagonal portion of local 2977 submatrix (same for all local rows) 2978 . d_nnz - array containing the number of block nonzeros in the various block rows 2979 of the in diagonal portion of the local (possibly different for each block 2980 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2981 set it even if it is zero. 2982 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2983 submatrix (same for all local rows). 2984 - o_nnz - array containing the number of nonzeros in the various block rows of the 2985 off-diagonal portion of the local submatrix (possibly different for 2986 each block row) or NULL. 2987 2988 If the *_nnz parameter is given then the *_nz parameter is ignored 2989 2990 Options Database Keys: 2991 + -mat_block_size - size of the blocks to use 2992 - -mat_use_hash_table <fact> - set hash table factor 2993 2994 Notes: 2995 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2996 than it must be used on all processors that share the object for that argument. 2997 2998 Storage Information: 2999 For a square global matrix we define each processor's diagonal portion 3000 to be its local rows and the corresponding columns (a square submatrix); 3001 each processor's off-diagonal portion encompasses the remainder of the 3002 local matrix (a rectangular submatrix). 3003 3004 The user can specify preallocated storage for the diagonal part of 3005 the local submatrix with either d_nz or d_nnz (not both). Set 3006 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3007 memory allocation. Likewise, specify preallocated storage for the 3008 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3009 3010 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3011 the figure below we depict these three local rows and all columns (0-11). 3012 3013 .vb 3014 0 1 2 3 4 5 6 7 8 9 10 11 3015 -------------------------- 3016 row 3 |o o o d d d o o o o o o 3017 row 4 |o o o d d d o o o o o o 3018 row 5 |o o o d d d o o o o o o 3019 -------------------------- 3020 .ve 3021 3022 Thus, any entries in the d locations are stored in the d (diagonal) 3023 submatrix, and any entries in the o locations are stored in the 3024 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3025 stored simply in the MATSEQBAIJ format for compressed row storage. 3026 3027 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3028 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3029 In general, for PDE problems in which most nonzeros are near the diagonal, 3030 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3031 or you will get TERRIBLE performance; see the users' manual chapter on 3032 matrices. 3033 3034 You can call MatGetInfo() to get information on how effective the preallocation was; 3035 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3036 You can also run with the option -info and look for messages with the string 3037 malloc in them to see if additional memory allocation was needed. 3038 3039 Level: intermediate 3040 3041 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3042 @*/ 3043 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3044 { 3045 PetscFunctionBegin; 3046 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3047 PetscValidType(B,1); 3048 PetscValidLogicalCollectiveInt(B,bs,2); 3049 PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz)); 3050 PetscFunctionReturn(0); 3051 } 3052 3053 /*@C 3054 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3055 (block compressed row). For good matrix assembly performance 3056 the user should preallocate the matrix storage by setting the parameters 3057 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3058 performance can be increased by more than a factor of 50. 3059 3060 Collective 3061 3062 Input Parameters: 3063 + comm - MPI communicator 3064 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3065 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3066 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3067 This value should be the same as the local size used in creating the 3068 y vector for the matrix-vector product y = Ax. 3069 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3070 This value should be the same as the local size used in creating the 3071 x vector for the matrix-vector product y = Ax. 3072 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3073 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3074 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3075 submatrix (same for all local rows) 3076 . d_nnz - array containing the number of nonzero blocks in the various block rows 3077 of the in diagonal portion of the local (possibly different for each block 3078 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3079 and set it even if it is zero. 3080 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3081 submatrix (same for all local rows). 3082 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3083 off-diagonal portion of the local submatrix (possibly different for 3084 each block row) or NULL. 3085 3086 Output Parameter: 3087 . A - the matrix 3088 3089 Options Database Keys: 3090 + -mat_block_size - size of the blocks to use 3091 - -mat_use_hash_table <fact> - set hash table factor 3092 3093 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3094 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3095 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3096 3097 Notes: 3098 If the *_nnz parameter is given then the *_nz parameter is ignored 3099 3100 A nonzero block is any block that as 1 or more nonzeros in it 3101 3102 The user MUST specify either the local or global matrix dimensions 3103 (possibly both). 3104 3105 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3106 than it must be used on all processors that share the object for that argument. 3107 3108 Storage Information: 3109 For a square global matrix we define each processor's diagonal portion 3110 to be its local rows and the corresponding columns (a square submatrix); 3111 each processor's off-diagonal portion encompasses the remainder of the 3112 local matrix (a rectangular submatrix). 3113 3114 The user can specify preallocated storage for the diagonal part of 3115 the local submatrix with either d_nz or d_nnz (not both). Set 3116 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3117 memory allocation. Likewise, specify preallocated storage for the 3118 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3119 3120 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3121 the figure below we depict these three local rows and all columns (0-11). 3122 3123 .vb 3124 0 1 2 3 4 5 6 7 8 9 10 11 3125 -------------------------- 3126 row 3 |o o o d d d o o o o o o 3127 row 4 |o o o d d d o o o o o o 3128 row 5 |o o o d d d o o o o o o 3129 -------------------------- 3130 .ve 3131 3132 Thus, any entries in the d locations are stored in the d (diagonal) 3133 submatrix, and any entries in the o locations are stored in the 3134 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3135 stored simply in the MATSEQBAIJ format for compressed row storage. 3136 3137 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3138 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3139 In general, for PDE problems in which most nonzeros are near the diagonal, 3140 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3141 or you will get TERRIBLE performance; see the users' manual chapter on 3142 matrices. 3143 3144 Level: intermediate 3145 3146 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3147 @*/ 3148 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3149 { 3150 PetscMPIInt size; 3151 3152 PetscFunctionBegin; 3153 PetscCall(MatCreate(comm,A)); 3154 PetscCall(MatSetSizes(*A,m,n,M,N)); 3155 PetscCallMPI(MPI_Comm_size(comm,&size)); 3156 if (size > 1) { 3157 PetscCall(MatSetType(*A,MATMPIBAIJ)); 3158 PetscCall(MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz)); 3159 } else { 3160 PetscCall(MatSetType(*A,MATSEQBAIJ)); 3161 PetscCall(MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz)); 3162 } 3163 PetscFunctionReturn(0); 3164 } 3165 3166 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3167 { 3168 Mat mat; 3169 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3170 PetscInt len=0; 3171 3172 PetscFunctionBegin; 3173 *newmat = NULL; 3174 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 3175 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 3176 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 3177 3178 mat->factortype = matin->factortype; 3179 mat->preallocated = PETSC_TRUE; 3180 mat->assembled = PETSC_TRUE; 3181 mat->insertmode = NOT_SET_VALUES; 3182 3183 a = (Mat_MPIBAIJ*)mat->data; 3184 mat->rmap->bs = matin->rmap->bs; 3185 a->bs2 = oldmat->bs2; 3186 a->mbs = oldmat->mbs; 3187 a->nbs = oldmat->nbs; 3188 a->Mbs = oldmat->Mbs; 3189 a->Nbs = oldmat->Nbs; 3190 3191 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 3192 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 3193 3194 a->size = oldmat->size; 3195 a->rank = oldmat->rank; 3196 a->donotstash = oldmat->donotstash; 3197 a->roworiented = oldmat->roworiented; 3198 a->rowindices = NULL; 3199 a->rowvalues = NULL; 3200 a->getrowactive = PETSC_FALSE; 3201 a->barray = NULL; 3202 a->rstartbs = oldmat->rstartbs; 3203 a->rendbs = oldmat->rendbs; 3204 a->cstartbs = oldmat->cstartbs; 3205 a->cendbs = oldmat->cendbs; 3206 3207 /* hash table stuff */ 3208 a->ht = NULL; 3209 a->hd = NULL; 3210 a->ht_size = 0; 3211 a->ht_flag = oldmat->ht_flag; 3212 a->ht_fact = oldmat->ht_fact; 3213 a->ht_total_ct = 0; 3214 a->ht_insert_ct = 0; 3215 3216 PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1)); 3217 if (oldmat->colmap) { 3218 #if defined(PETSC_USE_CTABLE) 3219 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 3220 #else 3221 PetscCall(PetscMalloc1(a->Nbs,&a->colmap)); 3222 PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt))); 3223 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs)); 3224 #endif 3225 } else a->colmap = NULL; 3226 3227 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3228 PetscCall(PetscMalloc1(len,&a->garray)); 3229 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 3230 PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 3231 } else a->garray = NULL; 3232 3233 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash)); 3234 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 3235 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 3236 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 3237 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 3238 3239 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 3240 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 3241 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 3242 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 3243 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 3244 *newmat = mat; 3245 PetscFunctionReturn(0); 3246 } 3247 3248 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3249 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3250 { 3251 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3252 PetscInt *rowidxs,*colidxs,rs,cs,ce; 3253 PetscScalar *matvals; 3254 3255 PetscFunctionBegin; 3256 PetscCall(PetscViewerSetUp(viewer)); 3257 3258 /* read in matrix header */ 3259 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3260 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3261 M = header[1]; N = header[2]; nz = header[3]; 3262 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3263 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3264 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3265 3266 /* set block sizes from the viewer's .info file */ 3267 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3268 /* set local sizes if not set already */ 3269 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3270 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3271 /* set global sizes if not set already */ 3272 if (mat->rmap->N < 0) mat->rmap->N = M; 3273 if (mat->cmap->N < 0) mat->cmap->N = N; 3274 PetscCall(PetscLayoutSetUp(mat->rmap)); 3275 PetscCall(PetscLayoutSetUp(mat->cmap)); 3276 3277 /* check if the matrix sizes are correct */ 3278 PetscCall(MatGetSize(mat,&rows,&cols)); 3279 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3280 PetscCall(MatGetBlockSize(mat,&bs)); 3281 PetscCall(MatGetLocalSize(mat,&m,&n)); 3282 PetscCall(PetscLayoutGetRange(mat->rmap,&rs,NULL)); 3283 PetscCall(PetscLayoutGetRange(mat->cmap,&cs,&ce)); 3284 mbs = m/bs; nbs = n/bs; 3285 3286 /* read in row lengths and build row indices */ 3287 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3288 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3289 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3290 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3291 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3292 3293 /* read in column indices and matrix values */ 3294 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3295 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3296 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3297 3298 { /* preallocate matrix storage */ 3299 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3300 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3301 PetscBool sbaij,done; 3302 PetscInt *d_nnz,*o_nnz; 3303 3304 PetscCall(PetscBTCreate(nbs,&bt)); 3305 PetscCall(PetscHSetICreate(&ht)); 3306 PetscCall(PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz)); 3307 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij)); 3308 for (i=0; i<mbs; i++) { 3309 PetscCall(PetscBTMemzero(nbs,bt)); 3310 PetscCall(PetscHSetIClear(ht)); 3311 for (k=0; k<bs; k++) { 3312 PetscInt row = bs*i + k; 3313 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3314 PetscInt col = colidxs[j]; 3315 if (!sbaij || col >= row) { 3316 if (col >= cs && col < ce) { 3317 if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3318 } else { 3319 PetscCall(PetscHSetIQueryAdd(ht,col/bs,&done)); 3320 if (done) o_nnz[i]++; 3321 } 3322 } 3323 } 3324 } 3325 } 3326 PetscCall(PetscBTDestroy(&bt)); 3327 PetscCall(PetscHSetIDestroy(&ht)); 3328 PetscCall(MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz)); 3329 PetscCall(MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz)); 3330 PetscCall(PetscFree2(d_nnz,o_nnz)); 3331 } 3332 3333 /* store matrix values */ 3334 for (i=0; i<m; i++) { 3335 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3336 PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES)); 3337 } 3338 3339 PetscCall(PetscFree(rowidxs)); 3340 PetscCall(PetscFree2(colidxs,matvals)); 3341 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 3342 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 3343 PetscFunctionReturn(0); 3344 } 3345 3346 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 3347 { 3348 PetscBool isbinary; 3349 3350 PetscFunctionBegin; 3351 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3352 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3353 PetscCall(MatLoad_MPIBAIJ_Binary(mat,viewer)); 3354 PetscFunctionReturn(0); 3355 } 3356 3357 /*@ 3358 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3359 3360 Input Parameters: 3361 + mat - the matrix 3362 - fact - factor 3363 3364 Not Collective, each process can use a different factor 3365 3366 Level: advanced 3367 3368 Notes: 3369 This can also be set by the command line option: -mat_use_hash_table <fact> 3370 3371 .seealso: `MatSetOption()` 3372 @*/ 3373 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3374 { 3375 PetscFunctionBegin; 3376 PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact)); 3377 PetscFunctionReturn(0); 3378 } 3379 3380 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3381 { 3382 Mat_MPIBAIJ *baij; 3383 3384 PetscFunctionBegin; 3385 baij = (Mat_MPIBAIJ*)mat->data; 3386 baij->ht_fact = fact; 3387 PetscFunctionReturn(0); 3388 } 3389 3390 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3391 { 3392 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3393 PetscBool flg; 3394 3395 PetscFunctionBegin; 3396 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg)); 3397 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3398 if (Ad) *Ad = a->A; 3399 if (Ao) *Ao = a->B; 3400 if (colmap) *colmap = a->garray; 3401 PetscFunctionReturn(0); 3402 } 3403 3404 /* 3405 Special version for direct calls from Fortran (to eliminate two function call overheads 3406 */ 3407 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3408 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3409 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3410 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3411 #endif 3412 3413 /*@C 3414 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3415 3416 Collective on Mat 3417 3418 Input Parameters: 3419 + mat - the matrix 3420 . min - number of input rows 3421 . im - input rows 3422 . nin - number of input columns 3423 . in - input columns 3424 . v - numerical values input 3425 - addvin - INSERT_VALUES or ADD_VALUES 3426 3427 Notes: 3428 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3429 3430 Level: advanced 3431 3432 .seealso: `MatSetValuesBlocked()` 3433 @*/ 3434 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3435 { 3436 /* convert input arguments to C version */ 3437 Mat mat = *matin; 3438 PetscInt m = *min, n = *nin; 3439 InsertMode addv = *addvin; 3440 3441 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3442 const MatScalar *value; 3443 MatScalar *barray = baij->barray; 3444 PetscBool roworiented = baij->roworiented; 3445 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3446 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3447 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3448 3449 PetscFunctionBegin; 3450 /* tasks normally handled by MatSetValuesBlocked() */ 3451 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3452 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3453 PetscCheck(!mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3454 if (mat->assembled) { 3455 mat->was_assembled = PETSC_TRUE; 3456 mat->assembled = PETSC_FALSE; 3457 } 3458 PetscCall(PetscLogEventBegin(MAT_SetValues,mat,0,0,0)); 3459 3460 if (!barray) { 3461 PetscCall(PetscMalloc1(bs2,&barray)); 3462 baij->barray = barray; 3463 } 3464 3465 if (roworiented) stepval = (n-1)*bs; 3466 else stepval = (m-1)*bs; 3467 3468 for (i=0; i<m; i++) { 3469 if (im[i] < 0) continue; 3470 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 3471 if (im[i] >= rstart && im[i] < rend) { 3472 row = im[i] - rstart; 3473 for (j=0; j<n; j++) { 3474 /* If NumCol = 1 then a copy is not required */ 3475 if ((roworiented) && (n == 1)) { 3476 barray = (MatScalar*)v + i*bs2; 3477 } else if ((!roworiented) && (m == 1)) { 3478 barray = (MatScalar*)v + j*bs2; 3479 } else { /* Here a copy is required */ 3480 if (roworiented) { 3481 value = v + i*(stepval+bs)*bs + j*bs; 3482 } else { 3483 value = v + j*(stepval+bs)*bs + i*bs; 3484 } 3485 for (ii=0; ii<bs; ii++,value+=stepval) { 3486 for (jj=0; jj<bs; jj++) { 3487 *barray++ = *value++; 3488 } 3489 } 3490 barray -=bs2; 3491 } 3492 3493 if (in[j] >= cstart && in[j] < cend) { 3494 col = in[j] - cstart; 3495 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 3496 } else if (in[j] < 0) continue; 3497 else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 3498 else { 3499 if (mat->was_assembled) { 3500 if (!baij->colmap) { 3501 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3502 } 3503 3504 #if defined(PETSC_USE_DEBUG) 3505 #if defined(PETSC_USE_CTABLE) 3506 { PetscInt data; 3507 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 3508 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3509 } 3510 #else 3511 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3512 #endif 3513 #endif 3514 #if defined(PETSC_USE_CTABLE) 3515 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 3516 col = (col - 1)/bs; 3517 #else 3518 col = (baij->colmap[in[j]] - 1)/bs; 3519 #endif 3520 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3521 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3522 col = in[j]; 3523 } 3524 } else col = in[j]; 3525 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 3526 } 3527 } 3528 } else { 3529 if (!baij->donotstash) { 3530 if (roworiented) { 3531 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 3532 } else { 3533 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 3534 } 3535 } 3536 } 3537 } 3538 3539 /* task normally handled by MatSetValuesBlocked() */ 3540 PetscCall(PetscLogEventEnd(MAT_SetValues,mat,0,0,0)); 3541 PetscFunctionReturn(0); 3542 } 3543 3544 /*@ 3545 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3546 CSR format the local rows. 3547 3548 Collective 3549 3550 Input Parameters: 3551 + comm - MPI communicator 3552 . bs - the block size, only a block size of 1 is supported 3553 . m - number of local rows (Cannot be PETSC_DECIDE) 3554 . n - This value should be the same as the local size used in creating the 3555 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3556 calculated if N is given) For square matrices n is almost always m. 3557 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3558 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3559 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3560 . j - column indices 3561 - a - matrix values 3562 3563 Output Parameter: 3564 . mat - the matrix 3565 3566 Level: intermediate 3567 3568 Notes: 3569 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3570 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3571 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3572 3573 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3574 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3575 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3576 with column-major ordering within blocks. 3577 3578 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3579 3580 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3581 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3582 @*/ 3583 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3584 { 3585 PetscFunctionBegin; 3586 PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3587 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3588 PetscCall(MatCreate(comm,mat)); 3589 PetscCall(MatSetSizes(*mat,m,n,M,N)); 3590 PetscCall(MatSetType(*mat,MATMPIBAIJ)); 3591 PetscCall(MatSetBlockSize(*mat,bs)); 3592 PetscCall(MatSetUp(*mat)); 3593 PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 3594 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a)); 3595 PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 3596 PetscFunctionReturn(0); 3597 } 3598 3599 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3600 { 3601 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3602 PetscInt *indx; 3603 PetscScalar *values; 3604 3605 PetscFunctionBegin; 3606 PetscCall(MatGetSize(inmat,&m,&N)); 3607 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3608 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3609 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3610 PetscInt *bindx,rmax=a->rmax,j; 3611 PetscMPIInt rank,size; 3612 3613 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 3614 mbs = m/bs; Nbs = N/cbs; 3615 if (n == PETSC_DECIDE) { 3616 PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N)); 3617 } 3618 nbs = n/cbs; 3619 3620 PetscCall(PetscMalloc1(rmax,&bindx)); 3621 MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */ 3622 3623 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3624 PetscCallMPI(MPI_Comm_rank(comm,&size)); 3625 if (rank == size-1) { 3626 /* Check sum(nbs) = Nbs */ 3627 PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 3628 } 3629 3630 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3631 for (i=0; i<mbs; i++) { 3632 PetscCall(MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */ 3633 nnz = nnz/bs; 3634 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3635 PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz)); 3636 PetscCall(MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); 3637 } 3638 PetscCall(PetscFree(bindx)); 3639 3640 PetscCall(MatCreate(comm,outmat)); 3641 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 3642 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 3643 PetscCall(MatSetType(*outmat,MATBAIJ)); 3644 PetscCall(MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz)); 3645 PetscCall(MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz)); 3646 MatPreallocateEnd(dnz,onz); 3647 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3648 } 3649 3650 /* numeric phase */ 3651 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 3652 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 3653 3654 for (i=0; i<m; i++) { 3655 PetscCall(MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values)); 3656 Ii = i + rstart; 3657 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 3658 PetscCall(MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values)); 3659 } 3660 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 3661 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 3662 PetscFunctionReturn(0); 3663 } 3664