1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #if defined(PETSC_HAVE_SCALAPACK) 11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 12 #endif 13 14 /* This could be moved to matimpl.h */ 15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 16 { 17 Mat preallocator; 18 PetscInt r,rstart,rend; 19 PetscInt bs,i,m,n,M,N; 20 PetscBool cong = PETSC_TRUE; 21 22 PetscFunctionBegin; 23 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 24 PetscValidLogicalCollectiveInt(B,nm,2); 25 for (i = 0; i < nm; i++) { 26 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 27 PetscCall(PetscLayoutCompare(B->rmap,X[i]->rmap,&cong)); 28 PetscCheck(cong,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 29 } 30 PetscValidLogicalCollectiveBool(B,fill,5); 31 PetscCall(MatGetBlockSize(B,&bs)); 32 PetscCall(MatGetSize(B,&M,&N)); 33 PetscCall(MatGetLocalSize(B,&m,&n)); 34 PetscCall(MatCreate(PetscObjectComm((PetscObject)B),&preallocator)); 35 PetscCall(MatSetType(preallocator,MATPREALLOCATOR)); 36 PetscCall(MatSetBlockSize(preallocator,bs)); 37 PetscCall(MatSetSizes(preallocator,m,n,M,N)); 38 PetscCall(MatSetUp(preallocator)); 39 PetscCall(MatGetOwnershipRange(preallocator,&rstart,&rend)); 40 for (r = rstart; r < rend; ++r) { 41 PetscInt ncols; 42 const PetscInt *row; 43 const PetscScalar *vals; 44 45 for (i = 0; i < nm; i++) { 46 PetscCall(MatGetRow(X[i],r,&ncols,&row,&vals)); 47 PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES)); 48 if (symm && symm[i]) { 49 PetscCall(MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES)); 50 } 51 PetscCall(MatRestoreRow(X[i],r,&ncols,&row,&vals)); 52 } 53 } 54 PetscCall(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY)); 55 PetscCall(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY)); 56 PetscCall(MatPreallocatorPreallocate(preallocator,fill,B)); 57 PetscCall(MatDestroy(&preallocator)); 58 PetscFunctionReturn(0); 59 } 60 61 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 62 { 63 Mat B; 64 PetscInt r; 65 66 PetscFunctionBegin; 67 if (reuse != MAT_REUSE_MATRIX) { 68 PetscBool symm = PETSC_TRUE,isdense; 69 PetscInt bs; 70 71 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 72 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 73 PetscCall(MatSetType(B,newtype)); 74 PetscCall(MatGetBlockSize(A,&bs)); 75 PetscCall(MatSetBlockSize(B,bs)); 76 PetscCall(PetscLayoutSetUp(B->rmap)); 77 PetscCall(PetscLayoutSetUp(B->cmap)); 78 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"")); 79 if (!isdense) { 80 PetscCall(MatGetRowUpperTriangular(A)); 81 PetscCall(MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE)); 82 PetscCall(MatRestoreRowUpperTriangular(A)); 83 } else { 84 PetscCall(MatSetUp(B)); 85 } 86 } else { 87 B = *newmat; 88 PetscCall(MatZeroEntries(B)); 89 } 90 91 PetscCall(MatGetRowUpperTriangular(A)); 92 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 93 PetscInt ncols; 94 const PetscInt *row; 95 const PetscScalar *vals; 96 97 PetscCall(MatGetRow(A,r,&ncols,&row,&vals)); 98 PetscCall(MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES)); 99 #if defined(PETSC_USE_COMPLEX) 100 if (A->hermitian) { 101 PetscInt i; 102 for (i = 0; i < ncols; i++) { 103 PetscCall(MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES)); 104 } 105 } else { 106 PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES)); 107 } 108 #else 109 PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES)); 110 #endif 111 PetscCall(MatRestoreRow(A,r,&ncols,&row,&vals)); 112 } 113 PetscCall(MatRestoreRowUpperTriangular(A)); 114 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 115 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 116 117 if (reuse == MAT_INPLACE_MATRIX) { 118 PetscCall(MatHeaderReplace(A,&B)); 119 } else { 120 *newmat = B; 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 126 { 127 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 128 129 PetscFunctionBegin; 130 PetscCall(MatStoreValues(aij->A)); 131 PetscCall(MatStoreValues(aij->B)); 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 136 { 137 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 138 139 PetscFunctionBegin; 140 PetscCall(MatRetrieveValues(aij->A)); 141 PetscCall(MatRetrieveValues(aij->B)); 142 PetscFunctionReturn(0); 143 } 144 145 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 146 { \ 147 brow = row/bs; \ 148 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 149 rmax = aimax[brow]; nrow = ailen[brow]; \ 150 bcol = col/bs; \ 151 ridx = row % bs; cidx = col % bs; \ 152 low = 0; high = nrow; \ 153 while (high-low > 3) { \ 154 t = (low+high)/2; \ 155 if (rp[t] > bcol) high = t; \ 156 else low = t; \ 157 } \ 158 for (_i=low; _i<high; _i++) { \ 159 if (rp[_i] > bcol) break; \ 160 if (rp[_i] == bcol) { \ 161 bap = ap + bs2*_i + bs*cidx + ridx; \ 162 if (addv == ADD_VALUES) *bap += value; \ 163 else *bap = value; \ 164 goto a_noinsert; \ 165 } \ 166 } \ 167 if (a->nonew == 1) goto a_noinsert; \ 168 PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 169 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 170 N = nrow++ - 1; \ 171 /* shift up all the later entries in this row */ \ 172 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \ 173 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 174 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 175 rp[_i] = bcol; \ 176 ap[bs2*_i + bs*cidx + ridx] = value; \ 177 A->nonzerostate++;\ 178 a_noinsert:; \ 179 ailen[brow] = nrow; \ 180 } 181 182 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 183 { \ 184 brow = row/bs; \ 185 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 186 rmax = bimax[brow]; nrow = bilen[brow]; \ 187 bcol = col/bs; \ 188 ridx = row % bs; cidx = col % bs; \ 189 low = 0; high = nrow; \ 190 while (high-low > 3) { \ 191 t = (low+high)/2; \ 192 if (rp[t] > bcol) high = t; \ 193 else low = t; \ 194 } \ 195 for (_i=low; _i<high; _i++) { \ 196 if (rp[_i] > bcol) break; \ 197 if (rp[_i] == bcol) { \ 198 bap = ap + bs2*_i + bs*cidx + ridx; \ 199 if (addv == ADD_VALUES) *bap += value; \ 200 else *bap = value; \ 201 goto b_noinsert; \ 202 } \ 203 } \ 204 if (b->nonew == 1) goto b_noinsert; \ 205 PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 206 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 207 N = nrow++ - 1; \ 208 /* shift up all the later entries in this row */ \ 209 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \ 210 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 211 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 212 rp[_i] = bcol; \ 213 ap[bs2*_i + bs*cidx + ridx] = value; \ 214 B->nonzerostate++;\ 215 b_noinsert:; \ 216 bilen[brow] = nrow; \ 217 } 218 219 /* Only add/insert a(i,j) with i<=j (blocks). 220 Any a(i,j) with i>j input by user is ingored or generates an error 221 */ 222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 223 { 224 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 225 MatScalar value; 226 PetscBool roworiented = baij->roworiented; 227 PetscInt i,j,row,col; 228 PetscInt rstart_orig=mat->rmap->rstart; 229 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 230 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 231 232 /* Some Variables required in the macro */ 233 Mat A = baij->A; 234 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 235 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 236 MatScalar *aa =a->a; 237 238 Mat B = baij->B; 239 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 240 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 241 MatScalar *ba =b->a; 242 243 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 244 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 245 MatScalar *ap,*bap; 246 247 /* for stash */ 248 PetscInt n_loc, *in_loc = NULL; 249 MatScalar *v_loc = NULL; 250 251 PetscFunctionBegin; 252 if (!baij->donotstash) { 253 if (n > baij->n_loc) { 254 PetscCall(PetscFree(baij->in_loc)); 255 PetscCall(PetscFree(baij->v_loc)); 256 PetscCall(PetscMalloc1(n,&baij->in_loc)); 257 PetscCall(PetscMalloc1(n,&baij->v_loc)); 258 259 baij->n_loc = n; 260 } 261 in_loc = baij->in_loc; 262 v_loc = baij->v_loc; 263 } 264 265 for (i=0; i<m; i++) { 266 if (im[i] < 0) continue; 267 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 268 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 269 row = im[i] - rstart_orig; /* local row index */ 270 for (j=0; j<n; j++) { 271 if (im[i]/bs > in[j]/bs) { 272 if (a->ignore_ltriangular) { 273 continue; /* ignore lower triangular blocks */ 274 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 275 } 276 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 277 col = in[j] - cstart_orig; /* local col index */ 278 brow = row/bs; bcol = col/bs; 279 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 280 if (roworiented) value = v[i*n+j]; 281 else value = v[i+j*m]; 282 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 283 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 284 } else if (in[j] < 0) { 285 continue; 286 } else { 287 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 288 /* off-diag entry (B) */ 289 if (mat->was_assembled) { 290 if (!baij->colmap) { 291 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 292 } 293 #if defined(PETSC_USE_CTABLE) 294 PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col)); 295 col = col - 1; 296 #else 297 col = baij->colmap[in[j]/bs] - 1; 298 #endif 299 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 300 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 301 col = in[j]; 302 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 303 B = baij->B; 304 b = (Mat_SeqBAIJ*)(B)->data; 305 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 306 ba = b->a; 307 } else col += in[j]%bs; 308 } else col = in[j]; 309 if (roworiented) value = v[i*n+j]; 310 else value = v[i+j*m]; 311 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 312 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 313 } 314 } 315 } else { /* off processor entry */ 316 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 317 if (!baij->donotstash) { 318 mat->assembled = PETSC_FALSE; 319 n_loc = 0; 320 for (j=0; j<n; j++) { 321 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 322 in_loc[n_loc] = in[j]; 323 if (roworiented) { 324 v_loc[n_loc] = v[i*n+j]; 325 } else { 326 v_loc[n_loc] = v[j*m+i]; 327 } 328 n_loc++; 329 } 330 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE)); 331 } 332 } 333 } 334 PetscFunctionReturn(0); 335 } 336 337 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 338 { 339 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 340 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 341 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 342 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 343 PetscBool roworiented=a->roworiented; 344 const PetscScalar *value = v; 345 MatScalar *ap,*aa = a->a,*bap; 346 347 PetscFunctionBegin; 348 if (col < row) { 349 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 350 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 351 } 352 rp = aj + ai[row]; 353 ap = aa + bs2*ai[row]; 354 rmax = imax[row]; 355 nrow = ailen[row]; 356 value = v; 357 low = 0; 358 high = nrow; 359 360 while (high-low > 7) { 361 t = (low+high)/2; 362 if (rp[t] > col) high = t; 363 else low = t; 364 } 365 for (i=low; i<high; i++) { 366 if (rp[i] > col) break; 367 if (rp[i] == col) { 368 bap = ap + bs2*i; 369 if (roworiented) { 370 if (is == ADD_VALUES) { 371 for (ii=0; ii<bs; ii++) { 372 for (jj=ii; jj<bs2; jj+=bs) { 373 bap[jj] += *value++; 374 } 375 } 376 } else { 377 for (ii=0; ii<bs; ii++) { 378 for (jj=ii; jj<bs2; jj+=bs) { 379 bap[jj] = *value++; 380 } 381 } 382 } 383 } else { 384 if (is == ADD_VALUES) { 385 for (ii=0; ii<bs; ii++) { 386 for (jj=0; jj<bs; jj++) { 387 *bap++ += *value++; 388 } 389 } 390 } else { 391 for (ii=0; ii<bs; ii++) { 392 for (jj=0; jj<bs; jj++) { 393 *bap++ = *value++; 394 } 395 } 396 } 397 } 398 goto noinsert2; 399 } 400 } 401 if (nonew == 1) goto noinsert2; 402 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 403 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 404 N = nrow++ - 1; high++; 405 /* shift up all the later entries in this row */ 406 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 407 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 408 rp[i] = col; 409 bap = ap + bs2*i; 410 if (roworiented) { 411 for (ii=0; ii<bs; ii++) { 412 for (jj=ii; jj<bs2; jj+=bs) { 413 bap[jj] = *value++; 414 } 415 } 416 } else { 417 for (ii=0; ii<bs; ii++) { 418 for (jj=0; jj<bs; jj++) { 419 *bap++ = *value++; 420 } 421 } 422 } 423 noinsert2:; 424 ailen[row] = nrow; 425 PetscFunctionReturn(0); 426 } 427 428 /* 429 This routine is exactly duplicated in mpibaij.c 430 */ 431 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 432 { 433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 434 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 435 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 436 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 437 PetscBool roworiented=a->roworiented; 438 const PetscScalar *value = v; 439 MatScalar *ap,*aa = a->a,*bap; 440 441 PetscFunctionBegin; 442 rp = aj + ai[row]; 443 ap = aa + bs2*ai[row]; 444 rmax = imax[row]; 445 nrow = ailen[row]; 446 low = 0; 447 high = nrow; 448 value = v; 449 while (high-low > 7) { 450 t = (low+high)/2; 451 if (rp[t] > col) high = t; 452 else low = t; 453 } 454 for (i=low; i<high; i++) { 455 if (rp[i] > col) break; 456 if (rp[i] == col) { 457 bap = ap + bs2*i; 458 if (roworiented) { 459 if (is == ADD_VALUES) { 460 for (ii=0; ii<bs; ii++) { 461 for (jj=ii; jj<bs2; jj+=bs) { 462 bap[jj] += *value++; 463 } 464 } 465 } else { 466 for (ii=0; ii<bs; ii++) { 467 for (jj=ii; jj<bs2; jj+=bs) { 468 bap[jj] = *value++; 469 } 470 } 471 } 472 } else { 473 if (is == ADD_VALUES) { 474 for (ii=0; ii<bs; ii++,value+=bs) { 475 for (jj=0; jj<bs; jj++) { 476 bap[jj] += value[jj]; 477 } 478 bap += bs; 479 } 480 } else { 481 for (ii=0; ii<bs; ii++,value+=bs) { 482 for (jj=0; jj<bs; jj++) { 483 bap[jj] = value[jj]; 484 } 485 bap += bs; 486 } 487 } 488 } 489 goto noinsert2; 490 } 491 } 492 if (nonew == 1) goto noinsert2; 493 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 494 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 495 N = nrow++ - 1; high++; 496 /* shift up all the later entries in this row */ 497 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 498 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 499 rp[i] = col; 500 bap = ap + bs2*i; 501 if (roworiented) { 502 for (ii=0; ii<bs; ii++) { 503 for (jj=ii; jj<bs2; jj+=bs) { 504 bap[jj] = *value++; 505 } 506 } 507 } else { 508 for (ii=0; ii<bs; ii++) { 509 for (jj=0; jj<bs; jj++) { 510 *bap++ = *value++; 511 } 512 } 513 } 514 noinsert2:; 515 ailen[row] = nrow; 516 PetscFunctionReturn(0); 517 } 518 519 /* 520 This routine could be optimized by removing the need for the block copy below and passing stride information 521 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 522 */ 523 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 524 { 525 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 526 const MatScalar *value; 527 MatScalar *barray =baij->barray; 528 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 529 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 530 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 531 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 532 533 PetscFunctionBegin; 534 if (!barray) { 535 PetscCall(PetscMalloc1(bs2,&barray)); 536 baij->barray = barray; 537 } 538 539 if (roworiented) { 540 stepval = (n-1)*bs; 541 } else { 542 stepval = (m-1)*bs; 543 } 544 for (i=0; i<m; i++) { 545 if (im[i] < 0) continue; 546 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 547 if (im[i] >= rstart && im[i] < rend) { 548 row = im[i] - rstart; 549 for (j=0; j<n; j++) { 550 if (im[i] > in[j]) { 551 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 552 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 553 } 554 /* If NumCol = 1 then a copy is not required */ 555 if ((roworiented) && (n == 1)) { 556 barray = (MatScalar*) v + i*bs2; 557 } else if ((!roworiented) && (m == 1)) { 558 barray = (MatScalar*) v + j*bs2; 559 } else { /* Here a copy is required */ 560 if (roworiented) { 561 value = v + i*(stepval+bs)*bs + j*bs; 562 } else { 563 value = v + j*(stepval+bs)*bs + i*bs; 564 } 565 for (ii=0; ii<bs; ii++,value+=stepval) { 566 for (jj=0; jj<bs; jj++) { 567 *barray++ = *value++; 568 } 569 } 570 barray -=bs2; 571 } 572 573 if (in[j] >= cstart && in[j] < cend) { 574 col = in[j] - cstart; 575 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 576 } else if (in[j] < 0) { 577 continue; 578 } else { 579 PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 580 if (mat->was_assembled) { 581 if (!baij->colmap) { 582 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 583 } 584 585 #if defined(PETSC_USE_DEBUG) 586 #if defined(PETSC_USE_CTABLE) 587 { PetscInt data; 588 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 589 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 590 } 591 #else 592 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 593 #endif 594 #endif 595 #if defined(PETSC_USE_CTABLE) 596 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 597 col = (col - 1)/bs; 598 #else 599 col = (baij->colmap[in[j]] - 1)/bs; 600 #endif 601 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 602 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 603 col = in[j]; 604 } 605 } else col = in[j]; 606 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 607 } 608 } 609 } else { 610 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 611 if (!baij->donotstash) { 612 if (roworiented) { 613 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 614 } else { 615 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 616 } 617 } 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 624 { 625 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 626 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 627 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 628 629 PetscFunctionBegin; 630 for (i=0; i<m; i++) { 631 if (idxm[i] < 0) continue; /* negative row */ 632 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 633 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 634 row = idxm[i] - bsrstart; 635 for (j=0; j<n; j++) { 636 if (idxn[j] < 0) continue; /* negative column */ 637 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 638 if (idxn[j] >= bscstart && idxn[j] < bscend) { 639 col = idxn[j] - bscstart; 640 PetscCall(MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j)); 641 } else { 642 if (!baij->colmap) { 643 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 644 } 645 #if defined(PETSC_USE_CTABLE) 646 PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data)); 647 data--; 648 #else 649 data = baij->colmap[idxn[j]/bs]-1; 650 #endif 651 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 652 else { 653 col = data + idxn[j]%bs; 654 PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j)); 655 } 656 } 657 } 658 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 659 } 660 PetscFunctionReturn(0); 661 } 662 663 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 664 { 665 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 666 PetscReal sum[2],*lnorm2; 667 668 PetscFunctionBegin; 669 if (baij->size == 1) { 670 PetscCall(MatNorm(baij->A,type,norm)); 671 } else { 672 if (type == NORM_FROBENIUS) { 673 PetscCall(PetscMalloc1(2,&lnorm2)); 674 PetscCall(MatNorm(baij->A,type,lnorm2)); 675 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 676 PetscCall(MatNorm(baij->B,type,lnorm2)); 677 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 678 PetscCall(MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 679 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 680 PetscCall(PetscFree(lnorm2)); 681 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 682 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 683 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 684 PetscReal *rsum,*rsum2,vabs; 685 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 686 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 687 MatScalar *v; 688 689 PetscCall(PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2)); 690 PetscCall(PetscArrayzero(rsum,mat->cmap->N)); 691 /* Amat */ 692 v = amat->a; jj = amat->j; 693 for (brow=0; brow<mbs; brow++) { 694 grow = bs*(rstart + brow); 695 nz = amat->i[brow+1] - amat->i[brow]; 696 for (bcol=0; bcol<nz; bcol++) { 697 gcol = bs*(rstart + *jj); jj++; 698 for (col=0; col<bs; col++) { 699 for (row=0; row<bs; row++) { 700 vabs = PetscAbsScalar(*v); v++; 701 rsum[gcol+col] += vabs; 702 /* non-diagonal block */ 703 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 704 } 705 } 706 } 707 PetscCall(PetscLogFlops(nz*bs*bs)); 708 } 709 /* Bmat */ 710 v = bmat->a; jj = bmat->j; 711 for (brow=0; brow<mbs; brow++) { 712 grow = bs*(rstart + brow); 713 nz = bmat->i[brow+1] - bmat->i[brow]; 714 for (bcol=0; bcol<nz; bcol++) { 715 gcol = bs*garray[*jj]; jj++; 716 for (col=0; col<bs; col++) { 717 for (row=0; row<bs; row++) { 718 vabs = PetscAbsScalar(*v); v++; 719 rsum[gcol+col] += vabs; 720 rsum[grow+row] += vabs; 721 } 722 } 723 } 724 PetscCall(PetscLogFlops(nz*bs*bs)); 725 } 726 PetscCall(MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 727 *norm = 0.0; 728 for (col=0; col<mat->cmap->N; col++) { 729 if (rsum2[col] > *norm) *norm = rsum2[col]; 730 } 731 PetscCall(PetscFree2(rsum,rsum2)); 732 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 733 } 734 PetscFunctionReturn(0); 735 } 736 737 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 738 { 739 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 740 PetscInt nstash,reallocs; 741 742 PetscFunctionBegin; 743 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 744 745 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 746 PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs)); 747 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 748 PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 750 PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 751 PetscFunctionReturn(0); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 755 { 756 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 757 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 758 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 759 PetscInt *row,*col; 760 PetscBool other_disassembled; 761 PetscMPIInt n; 762 PetscBool r1,r2,r3; 763 MatScalar *val; 764 765 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 766 PetscFunctionBegin; 767 if (!baij->donotstash && !mat->nooffprocentries) { 768 while (1) { 769 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 770 if (!flg) break; 771 772 for (i=0; i<n;) { 773 /* Now identify the consecutive vals belonging to the same row */ 774 for (j=i,rstart=row[j]; j<n; j++) { 775 if (row[j] != rstart) break; 776 } 777 if (j < n) ncols = j-i; 778 else ncols = n-i; 779 /* Now assemble all these values with a single function call */ 780 PetscCall(MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 781 i = j; 782 } 783 } 784 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 785 /* Now process the block-stash. Since the values are stashed column-oriented, 786 set the roworiented flag to column oriented, and after MatSetValues() 787 restore the original flags */ 788 r1 = baij->roworiented; 789 r2 = a->roworiented; 790 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 791 792 baij->roworiented = PETSC_FALSE; 793 a->roworiented = PETSC_FALSE; 794 795 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 796 while (1) { 797 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg)); 798 if (!flg) break; 799 800 for (i=0; i<n;) { 801 /* Now identify the consecutive vals belonging to the same row */ 802 for (j=i,rstart=row[j]; j<n; j++) { 803 if (row[j] != rstart) break; 804 } 805 if (j < n) ncols = j-i; 806 else ncols = n-i; 807 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode)); 808 i = j; 809 } 810 } 811 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 812 813 baij->roworiented = r1; 814 a->roworiented = r2; 815 816 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 817 } 818 819 PetscCall(MatAssemblyBegin(baij->A,mode)); 820 PetscCall(MatAssemblyEnd(baij->A,mode)); 821 822 /* determine if any processor has disassembled, if so we must 823 also disassemble ourselves, in order that we may reassemble. */ 824 /* 825 if nonzero structure of submatrix B cannot change then we know that 826 no processor disassembled thus we can skip this stuff 827 */ 828 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 829 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 830 if (mat->was_assembled && !other_disassembled) { 831 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 832 } 833 } 834 835 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 836 PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ 837 } 838 PetscCall(MatAssemblyBegin(baij->B,mode)); 839 PetscCall(MatAssemblyEnd(baij->B,mode)); 840 841 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 842 843 baij->rowvalues = NULL; 844 845 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 846 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 847 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 848 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 849 } 850 PetscFunctionReturn(0); 851 } 852 853 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 854 #include <petscdraw.h> 855 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 856 { 857 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 858 PetscInt bs = mat->rmap->bs; 859 PetscMPIInt rank = baij->rank; 860 PetscBool iascii,isdraw; 861 PetscViewer sviewer; 862 PetscViewerFormat format; 863 864 PetscFunctionBegin; 865 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 866 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 867 if (iascii) { 868 PetscCall(PetscViewerGetFormat(viewer,&format)); 869 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 870 MatInfo info; 871 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 872 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 873 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 874 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory)); 875 PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info)); 876 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 877 PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info)); 878 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 879 PetscCall(PetscViewerFlush(viewer)); 880 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 881 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 882 PetscCall(VecScatterView(baij->Mvctx,viewer)); 883 PetscFunctionReturn(0); 884 } else if (format == PETSC_VIEWER_ASCII_INFO) { 885 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 886 PetscFunctionReturn(0); 887 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 888 PetscFunctionReturn(0); 889 } 890 } 891 892 if (isdraw) { 893 PetscDraw draw; 894 PetscBool isnull; 895 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 896 PetscCall(PetscDrawIsNull(draw,&isnull)); 897 if (isnull) PetscFunctionReturn(0); 898 } 899 900 { 901 /* assemble the entire matrix onto first processor. */ 902 Mat A; 903 Mat_SeqSBAIJ *Aloc; 904 Mat_SeqBAIJ *Bloc; 905 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 906 MatScalar *a; 907 const char *matname; 908 909 /* Should this be the same type as mat? */ 910 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 911 if (rank == 0) { 912 PetscCall(MatSetSizes(A,M,N,M,N)); 913 } else { 914 PetscCall(MatSetSizes(A,0,0,M,N)); 915 } 916 PetscCall(MatSetType(A,MATMPISBAIJ)); 917 PetscCall(MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL)); 918 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 919 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 920 921 /* copy over the A part */ 922 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 923 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 924 PetscCall(PetscMalloc1(bs,&rvals)); 925 926 for (i=0; i<mbs; i++) { 927 rvals[0] = bs*(baij->rstartbs + i); 928 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 929 for (j=ai[i]; j<ai[i+1]; j++) { 930 col = (baij->cstartbs+aj[j])*bs; 931 for (k=0; k<bs; k++) { 932 PetscCall(MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 933 col++; 934 a += bs; 935 } 936 } 937 } 938 /* copy over the B part */ 939 Bloc = (Mat_SeqBAIJ*)baij->B->data; 940 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 941 for (i=0; i<mbs; i++) { 942 943 rvals[0] = bs*(baij->rstartbs + i); 944 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 945 for (j=ai[i]; j<ai[i+1]; j++) { 946 col = baij->garray[aj[j]]*bs; 947 for (k=0; k<bs; k++) { 948 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 949 col++; 950 a += bs; 951 } 952 } 953 } 954 PetscCall(PetscFree(rvals)); 955 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 956 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 957 /* 958 Everyone has to call to draw the matrix since the graphics waits are 959 synchronized across all processors that share the PetscDraw object 960 */ 961 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 962 PetscCall(PetscObjectGetName((PetscObject)mat,&matname)); 963 if (rank == 0) { 964 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname)); 965 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer)); 966 } 967 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 968 PetscCall(PetscViewerFlush(viewer)); 969 PetscCall(MatDestroy(&A)); 970 } 971 PetscFunctionReturn(0); 972 } 973 974 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 975 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 976 977 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 978 { 979 PetscBool iascii,isdraw,issocket,isbinary; 980 981 PetscFunctionBegin; 982 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 983 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 984 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 985 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 986 if (iascii || isdraw || issocket) { 987 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer)); 988 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat,viewer)); 989 PetscFunctionReturn(0); 990 } 991 992 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 993 { 994 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 995 996 PetscFunctionBegin; 997 #if defined(PETSC_USE_LOG) 998 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 999 #endif 1000 PetscCall(MatStashDestroy_Private(&mat->stash)); 1001 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1002 PetscCall(MatDestroy(&baij->A)); 1003 PetscCall(MatDestroy(&baij->B)); 1004 #if defined(PETSC_USE_CTABLE) 1005 PetscCall(PetscTableDestroy(&baij->colmap)); 1006 #else 1007 PetscCall(PetscFree(baij->colmap)); 1008 #endif 1009 PetscCall(PetscFree(baij->garray)); 1010 PetscCall(VecDestroy(&baij->lvec)); 1011 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1012 PetscCall(VecDestroy(&baij->slvec0)); 1013 PetscCall(VecDestroy(&baij->slvec0b)); 1014 PetscCall(VecDestroy(&baij->slvec1)); 1015 PetscCall(VecDestroy(&baij->slvec1a)); 1016 PetscCall(VecDestroy(&baij->slvec1b)); 1017 PetscCall(VecScatterDestroy(&baij->sMvctx)); 1018 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 1019 PetscCall(PetscFree(baij->barray)); 1020 PetscCall(PetscFree(baij->hd)); 1021 PetscCall(VecDestroy(&baij->diag)); 1022 PetscCall(VecDestroy(&baij->bb1)); 1023 PetscCall(VecDestroy(&baij->xx1)); 1024 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1025 PetscCall(PetscFree(baij->setvaluescopy)); 1026 #endif 1027 PetscCall(PetscFree(baij->in_loc)); 1028 PetscCall(PetscFree(baij->v_loc)); 1029 PetscCall(PetscFree(baij->rangebs)); 1030 PetscCall(PetscFree(mat->data)); 1031 1032 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1033 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1034 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1035 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL)); 1036 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL)); 1037 #if defined(PETSC_HAVE_ELEMENTAL) 1038 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL)); 1039 #endif 1040 #if defined(PETSC_HAVE_SCALAPACK) 1041 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL)); 1042 #endif 1043 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL)); 1044 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL)); 1045 PetscFunctionReturn(0); 1046 } 1047 1048 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1049 { 1050 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1051 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1052 PetscScalar *from; 1053 const PetscScalar *x; 1054 1055 PetscFunctionBegin; 1056 /* diagonal part */ 1057 PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a)); 1058 PetscCall(VecSet(a->slvec1b,0.0)); 1059 1060 /* subdiagonal part */ 1061 PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name); 1062 PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b)); 1063 1064 /* copy x into the vec slvec0 */ 1065 PetscCall(VecGetArray(a->slvec0,&from)); 1066 PetscCall(VecGetArrayRead(xx,&x)); 1067 1068 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1069 PetscCall(VecRestoreArray(a->slvec0,&from)); 1070 PetscCall(VecRestoreArrayRead(xx,&x)); 1071 1072 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1073 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1074 /* supperdiagonal part */ 1075 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy)); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1080 { 1081 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1082 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1083 PetscScalar *from; 1084 const PetscScalar *x; 1085 1086 PetscFunctionBegin; 1087 /* diagonal part */ 1088 PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a)); 1089 PetscCall(VecSet(a->slvec1b,0.0)); 1090 1091 /* subdiagonal part */ 1092 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b)); 1093 1094 /* copy x into the vec slvec0 */ 1095 PetscCall(VecGetArray(a->slvec0,&from)); 1096 PetscCall(VecGetArrayRead(xx,&x)); 1097 1098 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1099 PetscCall(VecRestoreArray(a->slvec0,&from)); 1100 PetscCall(VecRestoreArrayRead(xx,&x)); 1101 1102 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1103 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1104 /* supperdiagonal part */ 1105 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy)); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz) 1110 { 1111 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1112 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1113 PetscScalar *from,zero=0.0; 1114 const PetscScalar *x; 1115 1116 PetscFunctionBegin; 1117 /* diagonal part */ 1118 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a)); 1119 PetscCall(VecSet(a->slvec1b,zero)); 1120 1121 /* subdiagonal part */ 1122 PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name); 1123 PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b)); 1124 1125 /* copy x into the vec slvec0 */ 1126 PetscCall(VecGetArray(a->slvec0,&from)); 1127 PetscCall(VecGetArrayRead(xx,&x)); 1128 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1129 PetscCall(VecRestoreArray(a->slvec0,&from)); 1130 1131 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1132 PetscCall(VecRestoreArrayRead(xx,&x)); 1133 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1134 1135 /* supperdiagonal part */ 1136 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz)); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1141 { 1142 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1143 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1144 PetscScalar *from,zero=0.0; 1145 const PetscScalar *x; 1146 1147 PetscFunctionBegin; 1148 /* diagonal part */ 1149 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a)); 1150 PetscCall(VecSet(a->slvec1b,zero)); 1151 1152 /* subdiagonal part */ 1153 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b)); 1154 1155 /* copy x into the vec slvec0 */ 1156 PetscCall(VecGetArray(a->slvec0,&from)); 1157 PetscCall(VecGetArrayRead(xx,&x)); 1158 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1159 PetscCall(VecRestoreArray(a->slvec0,&from)); 1160 1161 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1162 PetscCall(VecRestoreArrayRead(xx,&x)); 1163 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1164 1165 /* supperdiagonal part */ 1166 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz)); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 /* 1171 This only works correctly for square matrices where the subblock A->A is the 1172 diagonal block 1173 */ 1174 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1175 { 1176 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1177 1178 PetscFunctionBegin; 1179 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1180 PetscCall(MatGetDiagonal(a->A,v)); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1185 { 1186 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1187 1188 PetscFunctionBegin; 1189 PetscCall(MatScale(a->A,aa)); 1190 PetscCall(MatScale(a->B,aa)); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1195 { 1196 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1197 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1198 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1199 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1200 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1201 1202 PetscFunctionBegin; 1203 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1204 mat->getrowactive = PETSC_TRUE; 1205 1206 if (!mat->rowvalues && (idx || v)) { 1207 /* 1208 allocate enough space to hold information from the longest row. 1209 */ 1210 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1211 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1212 PetscInt max = 1,mbs = mat->mbs,tmp; 1213 for (i=0; i<mbs; i++) { 1214 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1215 if (max < tmp) max = tmp; 1216 } 1217 PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices)); 1218 } 1219 1220 PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1221 lrow = row - brstart; /* local row index */ 1222 1223 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1224 if (!v) {pvA = NULL; pvB = NULL;} 1225 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1226 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1227 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1228 nztot = nzA + nzB; 1229 1230 cmap = mat->garray; 1231 if (v || idx) { 1232 if (nztot) { 1233 /* Sort by increasing column numbers, assuming A and B already sorted */ 1234 PetscInt imark = -1; 1235 if (v) { 1236 *v = v_p = mat->rowvalues; 1237 for (i=0; i<nzB; i++) { 1238 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1239 else break; 1240 } 1241 imark = i; 1242 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1243 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1244 } 1245 if (idx) { 1246 *idx = idx_p = mat->rowindices; 1247 if (imark > -1) { 1248 for (i=0; i<imark; i++) { 1249 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1250 } 1251 } else { 1252 for (i=0; i<nzB; i++) { 1253 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1254 else break; 1255 } 1256 imark = i; 1257 } 1258 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1259 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1260 } 1261 } else { 1262 if (idx) *idx = NULL; 1263 if (v) *v = NULL; 1264 } 1265 } 1266 *nz = nztot; 1267 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1268 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1273 { 1274 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1275 1276 PetscFunctionBegin; 1277 PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1278 baij->getrowactive = PETSC_FALSE; 1279 PetscFunctionReturn(0); 1280 } 1281 1282 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1283 { 1284 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1285 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1286 1287 PetscFunctionBegin; 1288 aA->getrow_utriangular = PETSC_TRUE; 1289 PetscFunctionReturn(0); 1290 } 1291 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1292 { 1293 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1294 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1295 1296 PetscFunctionBegin; 1297 aA->getrow_utriangular = PETSC_FALSE; 1298 PetscFunctionReturn(0); 1299 } 1300 1301 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1302 { 1303 PetscFunctionBegin; 1304 if (PetscDefined(USE_COMPLEX)) { 1305 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 1306 1307 PetscCall(MatConjugate(a->A)); 1308 PetscCall(MatConjugate(a->B)); 1309 } 1310 PetscFunctionReturn(0); 1311 } 1312 1313 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1314 { 1315 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1316 1317 PetscFunctionBegin; 1318 PetscCall(MatRealPart(a->A)); 1319 PetscCall(MatRealPart(a->B)); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1324 { 1325 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1326 1327 PetscFunctionBegin; 1328 PetscCall(MatImaginaryPart(a->A)); 1329 PetscCall(MatImaginaryPart(a->B)); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1334 Input: isrow - distributed(parallel), 1335 iscol_local - locally owned (seq) 1336 */ 1337 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1338 { 1339 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1340 const PetscInt *ptr1,*ptr2; 1341 1342 PetscFunctionBegin; 1343 PetscCall(ISGetLocalSize(isrow,&sz1)); 1344 PetscCall(ISGetLocalSize(iscol_local,&sz2)); 1345 if (sz1 > sz2) { 1346 *flg = PETSC_FALSE; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 PetscCall(ISGetIndices(isrow,&ptr1)); 1351 PetscCall(ISGetIndices(iscol_local,&ptr2)); 1352 1353 PetscCall(PetscMalloc1(sz1,&a1)); 1354 PetscCall(PetscMalloc1(sz2,&a2)); 1355 PetscCall(PetscArraycpy(a1,ptr1,sz1)); 1356 PetscCall(PetscArraycpy(a2,ptr2,sz2)); 1357 PetscCall(PetscSortInt(sz1,a1)); 1358 PetscCall(PetscSortInt(sz2,a2)); 1359 1360 nmatch=0; 1361 k = 0; 1362 for (i=0; i<sz1; i++) { 1363 for (j=k; j<sz2; j++) { 1364 if (a1[i] == a2[j]) { 1365 k = j; nmatch++; 1366 break; 1367 } 1368 } 1369 } 1370 PetscCall(ISRestoreIndices(isrow,&ptr1)); 1371 PetscCall(ISRestoreIndices(iscol_local,&ptr2)); 1372 PetscCall(PetscFree(a1)); 1373 PetscCall(PetscFree(a2)); 1374 if (nmatch < sz1) { 1375 *flg = PETSC_FALSE; 1376 } else { 1377 *flg = PETSC_TRUE; 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1383 { 1384 IS iscol_local; 1385 PetscInt csize; 1386 PetscBool isequal; 1387 1388 PetscFunctionBegin; 1389 PetscCall(ISGetLocalSize(iscol,&csize)); 1390 if (call == MAT_REUSE_MATRIX) { 1391 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 1392 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1393 } else { 1394 PetscBool issorted; 1395 1396 PetscCall(ISAllGather(iscol,&iscol_local)); 1397 PetscCall(ISEqual_private(isrow,iscol_local,&isequal)); 1398 PetscCall(ISSorted(iscol_local, &issorted)); 1399 PetscCheck(isequal && issorted,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow and be sorted"); 1400 } 1401 1402 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1403 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat)); 1404 if (call == MAT_INITIAL_MATRIX) { 1405 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 1406 PetscCall(ISDestroy(&iscol_local)); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1412 { 1413 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1414 1415 PetscFunctionBegin; 1416 PetscCall(MatZeroEntries(l->A)); 1417 PetscCall(MatZeroEntries(l->B)); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1422 { 1423 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1424 Mat A = a->A,B = a->B; 1425 PetscLogDouble isend[5],irecv[5]; 1426 1427 PetscFunctionBegin; 1428 info->block_size = (PetscReal)matin->rmap->bs; 1429 1430 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1431 1432 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1433 isend[3] = info->memory; isend[4] = info->mallocs; 1434 1435 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1436 1437 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1438 isend[3] += info->memory; isend[4] += info->mallocs; 1439 if (flag == MAT_LOCAL) { 1440 info->nz_used = isend[0]; 1441 info->nz_allocated = isend[1]; 1442 info->nz_unneeded = isend[2]; 1443 info->memory = isend[3]; 1444 info->mallocs = isend[4]; 1445 } else if (flag == MAT_GLOBAL_MAX) { 1446 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1447 1448 info->nz_used = irecv[0]; 1449 info->nz_allocated = irecv[1]; 1450 info->nz_unneeded = irecv[2]; 1451 info->memory = irecv[3]; 1452 info->mallocs = irecv[4]; 1453 } else if (flag == MAT_GLOBAL_SUM) { 1454 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1455 1456 info->nz_used = irecv[0]; 1457 info->nz_allocated = irecv[1]; 1458 info->nz_unneeded = irecv[2]; 1459 info->memory = irecv[3]; 1460 info->mallocs = irecv[4]; 1461 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1462 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1463 info->fill_ratio_needed = 0; 1464 info->factor_mallocs = 0; 1465 PetscFunctionReturn(0); 1466 } 1467 1468 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1469 { 1470 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1471 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1472 1473 PetscFunctionBegin; 1474 switch (op) { 1475 case MAT_NEW_NONZERO_LOCATIONS: 1476 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1477 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1478 case MAT_KEEP_NONZERO_PATTERN: 1479 case MAT_SUBMAT_SINGLEIS: 1480 case MAT_NEW_NONZERO_LOCATION_ERR: 1481 MatCheckPreallocated(A,1); 1482 PetscCall(MatSetOption(a->A,op,flg)); 1483 PetscCall(MatSetOption(a->B,op,flg)); 1484 break; 1485 case MAT_ROW_ORIENTED: 1486 MatCheckPreallocated(A,1); 1487 a->roworiented = flg; 1488 1489 PetscCall(MatSetOption(a->A,op,flg)); 1490 PetscCall(MatSetOption(a->B,op,flg)); 1491 break; 1492 case MAT_FORCE_DIAGONAL_ENTRIES: 1493 case MAT_SORTED_FULL: 1494 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1495 break; 1496 case MAT_IGNORE_OFF_PROC_ENTRIES: 1497 a->donotstash = flg; 1498 break; 1499 case MAT_USE_HASH_TABLE: 1500 a->ht_flag = flg; 1501 break; 1502 case MAT_HERMITIAN: 1503 MatCheckPreallocated(A,1); 1504 PetscCall(MatSetOption(a->A,op,flg)); 1505 #if defined(PETSC_USE_COMPLEX) 1506 if (flg) { /* need different mat-vec ops */ 1507 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1508 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1509 A->ops->multtranspose = NULL; 1510 A->ops->multtransposeadd = NULL; 1511 A->symmetric = PETSC_FALSE; 1512 } 1513 #endif 1514 break; 1515 case MAT_SPD: 1516 case MAT_SYMMETRIC: 1517 MatCheckPreallocated(A,1); 1518 PetscCall(MatSetOption(a->A,op,flg)); 1519 #if defined(PETSC_USE_COMPLEX) 1520 if (flg) { /* restore to use default mat-vec ops */ 1521 A->ops->mult = MatMult_MPISBAIJ; 1522 A->ops->multadd = MatMultAdd_MPISBAIJ; 1523 A->ops->multtranspose = MatMult_MPISBAIJ; 1524 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1525 } 1526 #endif 1527 break; 1528 case MAT_STRUCTURALLY_SYMMETRIC: 1529 MatCheckPreallocated(A,1); 1530 PetscCall(MatSetOption(a->A,op,flg)); 1531 break; 1532 case MAT_SYMMETRY_ETERNAL: 1533 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1534 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1535 break; 1536 case MAT_IGNORE_LOWER_TRIANGULAR: 1537 aA->ignore_ltriangular = flg; 1538 break; 1539 case MAT_ERROR_LOWER_TRIANGULAR: 1540 aA->ignore_ltriangular = flg; 1541 break; 1542 case MAT_GETROW_UPPERTRIANGULAR: 1543 aA->getrow_utriangular = flg; 1544 break; 1545 default: 1546 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1547 } 1548 PetscFunctionReturn(0); 1549 } 1550 1551 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1552 { 1553 PetscFunctionBegin; 1554 if (reuse == MAT_INITIAL_MATRIX) { 1555 PetscCall(MatDuplicate(A,MAT_COPY_VALUES,B)); 1556 } else if (reuse == MAT_REUSE_MATRIX) { 1557 PetscCall(MatCopy(A,*B,SAME_NONZERO_PATTERN)); 1558 } 1559 PetscFunctionReturn(0); 1560 } 1561 1562 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1563 { 1564 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1565 Mat a = baij->A, b=baij->B; 1566 PetscInt nv,m,n; 1567 PetscBool flg; 1568 1569 PetscFunctionBegin; 1570 if (ll != rr) { 1571 PetscCall(VecEqual(ll,rr,&flg)); 1572 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same"); 1573 } 1574 if (!ll) PetscFunctionReturn(0); 1575 1576 PetscCall(MatGetLocalSize(mat,&m,&n)); 1577 PetscCheck(m == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same",m,n); 1578 1579 PetscCall(VecGetLocalSize(rr,&nv)); 1580 PetscCheck(nv==n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1581 1582 PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1583 1584 /* left diagonalscale the off-diagonal part */ 1585 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1586 1587 /* scale the diagonal part */ 1588 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1589 1590 /* right diagonalscale the off-diagonal part */ 1591 PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1592 PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec)); 1593 PetscFunctionReturn(0); 1594 } 1595 1596 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1597 { 1598 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1599 1600 PetscFunctionBegin; 1601 PetscCall(MatSetUnfactored(a->A)); 1602 PetscFunctionReturn(0); 1603 } 1604 1605 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1606 1607 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1608 { 1609 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1610 Mat a,b,c,d; 1611 PetscBool flg; 1612 1613 PetscFunctionBegin; 1614 a = matA->A; b = matA->B; 1615 c = matB->A; d = matB->B; 1616 1617 PetscCall(MatEqual(a,c,&flg)); 1618 if (flg) { 1619 PetscCall(MatEqual(b,d,&flg)); 1620 } 1621 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1622 PetscFunctionReturn(0); 1623 } 1624 1625 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1626 { 1627 PetscBool isbaij; 1628 1629 PetscFunctionBegin; 1630 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"")); 1631 PetscCheck(isbaij,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1632 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1633 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1634 PetscCall(MatGetRowUpperTriangular(A)); 1635 PetscCall(MatCopy_Basic(A,B,str)); 1636 PetscCall(MatRestoreRowUpperTriangular(A)); 1637 } else { 1638 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1639 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1640 1641 PetscCall(MatCopy(a->A,b->A,str)); 1642 PetscCall(MatCopy(a->B,b->B,str)); 1643 } 1644 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1645 PetscFunctionReturn(0); 1646 } 1647 1648 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1649 { 1650 PetscFunctionBegin; 1651 PetscCall(MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 1652 PetscFunctionReturn(0); 1653 } 1654 1655 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1656 { 1657 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1658 PetscBLASInt bnz,one=1; 1659 Mat_SeqSBAIJ *xa,*ya; 1660 Mat_SeqBAIJ *xb,*yb; 1661 1662 PetscFunctionBegin; 1663 if (str == SAME_NONZERO_PATTERN) { 1664 PetscScalar alpha = a; 1665 xa = (Mat_SeqSBAIJ*)xx->A->data; 1666 ya = (Mat_SeqSBAIJ*)yy->A->data; 1667 PetscCall(PetscBLASIntCast(xa->nz,&bnz)); 1668 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1669 xb = (Mat_SeqBAIJ*)xx->B->data; 1670 yb = (Mat_SeqBAIJ*)yy->B->data; 1671 PetscCall(PetscBLASIntCast(xb->nz,&bnz)); 1672 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1673 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1674 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1675 PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 1676 PetscCall(MatAXPY_Basic(Y,a,X,str)); 1677 PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 1678 } else { 1679 Mat B; 1680 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1681 PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1682 PetscCall(MatGetRowUpperTriangular(X)); 1683 PetscCall(MatGetRowUpperTriangular(Y)); 1684 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 1685 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 1686 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 1687 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 1688 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 1689 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 1690 PetscCall(MatSetType(B,MATMPISBAIJ)); 1691 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d)); 1692 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 1693 PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o)); 1694 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 1695 PetscCall(MatHeaderMerge(Y,&B)); 1696 PetscCall(PetscFree(nnz_d)); 1697 PetscCall(PetscFree(nnz_o)); 1698 PetscCall(MatRestoreRowUpperTriangular(X)); 1699 PetscCall(MatRestoreRowUpperTriangular(Y)); 1700 } 1701 PetscFunctionReturn(0); 1702 } 1703 1704 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1705 { 1706 PetscInt i; 1707 PetscBool flg; 1708 1709 PetscFunctionBegin; 1710 PetscCall(MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B)); /* B[] are sbaij matrices */ 1711 for (i=0; i<n; i++) { 1712 PetscCall(ISEqual(irow[i],icol[i],&flg)); 1713 if (!flg) { 1714 PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1715 } 1716 } 1717 PetscFunctionReturn(0); 1718 } 1719 1720 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1721 { 1722 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1723 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1724 1725 PetscFunctionBegin; 1726 if (!Y->preallocated) { 1727 PetscCall(MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL)); 1728 } else if (!aij->nz) { 1729 PetscInt nonew = aij->nonew; 1730 PetscCall(MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL)); 1731 aij->nonew = nonew; 1732 } 1733 PetscCall(MatShift_Basic(Y,a)); 1734 PetscFunctionReturn(0); 1735 } 1736 1737 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1738 { 1739 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1740 1741 PetscFunctionBegin; 1742 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1743 PetscCall(MatMissingDiagonal(a->A,missing,d)); 1744 if (d) { 1745 PetscInt rstart; 1746 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 1747 *d += rstart/A->rmap->bs; 1748 1749 } 1750 PetscFunctionReturn(0); 1751 } 1752 1753 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1754 { 1755 PetscFunctionBegin; 1756 *a = ((Mat_MPISBAIJ*)A->data)->A; 1757 PetscFunctionReturn(0); 1758 } 1759 1760 /* -------------------------------------------------------------------*/ 1761 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1762 MatGetRow_MPISBAIJ, 1763 MatRestoreRow_MPISBAIJ, 1764 MatMult_MPISBAIJ, 1765 /* 4*/ MatMultAdd_MPISBAIJ, 1766 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1767 MatMultAdd_MPISBAIJ, 1768 NULL, 1769 NULL, 1770 NULL, 1771 /* 10*/ NULL, 1772 NULL, 1773 NULL, 1774 MatSOR_MPISBAIJ, 1775 MatTranspose_MPISBAIJ, 1776 /* 15*/ MatGetInfo_MPISBAIJ, 1777 MatEqual_MPISBAIJ, 1778 MatGetDiagonal_MPISBAIJ, 1779 MatDiagonalScale_MPISBAIJ, 1780 MatNorm_MPISBAIJ, 1781 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1782 MatAssemblyEnd_MPISBAIJ, 1783 MatSetOption_MPISBAIJ, 1784 MatZeroEntries_MPISBAIJ, 1785 /* 24*/ NULL, 1786 NULL, 1787 NULL, 1788 NULL, 1789 NULL, 1790 /* 29*/ MatSetUp_MPISBAIJ, 1791 NULL, 1792 NULL, 1793 MatGetDiagonalBlock_MPISBAIJ, 1794 NULL, 1795 /* 34*/ MatDuplicate_MPISBAIJ, 1796 NULL, 1797 NULL, 1798 NULL, 1799 NULL, 1800 /* 39*/ MatAXPY_MPISBAIJ, 1801 MatCreateSubMatrices_MPISBAIJ, 1802 MatIncreaseOverlap_MPISBAIJ, 1803 MatGetValues_MPISBAIJ, 1804 MatCopy_MPISBAIJ, 1805 /* 44*/ NULL, 1806 MatScale_MPISBAIJ, 1807 MatShift_MPISBAIJ, 1808 NULL, 1809 NULL, 1810 /* 49*/ NULL, 1811 NULL, 1812 NULL, 1813 NULL, 1814 NULL, 1815 /* 54*/ NULL, 1816 NULL, 1817 MatSetUnfactored_MPISBAIJ, 1818 NULL, 1819 MatSetValuesBlocked_MPISBAIJ, 1820 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1821 NULL, 1822 NULL, 1823 NULL, 1824 NULL, 1825 /* 64*/ NULL, 1826 NULL, 1827 NULL, 1828 NULL, 1829 NULL, 1830 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1831 NULL, 1832 MatConvert_MPISBAIJ_Basic, 1833 NULL, 1834 NULL, 1835 /* 74*/ NULL, 1836 NULL, 1837 NULL, 1838 NULL, 1839 NULL, 1840 /* 79*/ NULL, 1841 NULL, 1842 NULL, 1843 NULL, 1844 MatLoad_MPISBAIJ, 1845 /* 84*/ NULL, 1846 NULL, 1847 NULL, 1848 NULL, 1849 NULL, 1850 /* 89*/ NULL, 1851 NULL, 1852 NULL, 1853 NULL, 1854 NULL, 1855 /* 94*/ NULL, 1856 NULL, 1857 NULL, 1858 NULL, 1859 NULL, 1860 /* 99*/ NULL, 1861 NULL, 1862 NULL, 1863 MatConjugate_MPISBAIJ, 1864 NULL, 1865 /*104*/ NULL, 1866 MatRealPart_MPISBAIJ, 1867 MatImaginaryPart_MPISBAIJ, 1868 MatGetRowUpperTriangular_MPISBAIJ, 1869 MatRestoreRowUpperTriangular_MPISBAIJ, 1870 /*109*/ NULL, 1871 NULL, 1872 NULL, 1873 NULL, 1874 MatMissingDiagonal_MPISBAIJ, 1875 /*114*/ NULL, 1876 NULL, 1877 NULL, 1878 NULL, 1879 NULL, 1880 /*119*/ NULL, 1881 NULL, 1882 NULL, 1883 NULL, 1884 NULL, 1885 /*124*/ NULL, 1886 NULL, 1887 NULL, 1888 NULL, 1889 NULL, 1890 /*129*/ NULL, 1891 NULL, 1892 NULL, 1893 NULL, 1894 NULL, 1895 /*134*/ NULL, 1896 NULL, 1897 NULL, 1898 NULL, 1899 NULL, 1900 /*139*/ MatSetBlockSizes_Default, 1901 NULL, 1902 NULL, 1903 NULL, 1904 NULL, 1905 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1906 NULL, 1907 NULL, 1908 NULL 1909 }; 1910 1911 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1912 { 1913 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1914 PetscInt i,mbs,Mbs; 1915 PetscMPIInt size; 1916 1917 PetscFunctionBegin; 1918 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 1919 PetscCall(PetscLayoutSetUp(B->rmap)); 1920 PetscCall(PetscLayoutSetUp(B->cmap)); 1921 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 1922 PetscCheck(B->rmap->N <= B->cmap->N,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->N,B->cmap->N); 1923 PetscCheck(B->rmap->n <= B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->n,B->cmap->n); 1924 1925 mbs = B->rmap->n/bs; 1926 Mbs = B->rmap->N/bs; 1927 PetscCheck(mbs*bs == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,bs); 1928 1929 B->rmap->bs = bs; 1930 b->bs2 = bs*bs; 1931 b->mbs = mbs; 1932 b->Mbs = Mbs; 1933 b->nbs = B->cmap->n/bs; 1934 b->Nbs = B->cmap->N/bs; 1935 1936 for (i=0; i<=b->size; i++) { 1937 b->rangebs[i] = B->rmap->range[i]/bs; 1938 } 1939 b->rstartbs = B->rmap->rstart/bs; 1940 b->rendbs = B->rmap->rend/bs; 1941 1942 b->cstartbs = B->cmap->rstart/bs; 1943 b->cendbs = B->cmap->rend/bs; 1944 1945 #if defined(PETSC_USE_CTABLE) 1946 PetscCall(PetscTableDestroy(&b->colmap)); 1947 #else 1948 PetscCall(PetscFree(b->colmap)); 1949 #endif 1950 PetscCall(PetscFree(b->garray)); 1951 PetscCall(VecDestroy(&b->lvec)); 1952 PetscCall(VecScatterDestroy(&b->Mvctx)); 1953 PetscCall(VecDestroy(&b->slvec0)); 1954 PetscCall(VecDestroy(&b->slvec0b)); 1955 PetscCall(VecDestroy(&b->slvec1)); 1956 PetscCall(VecDestroy(&b->slvec1a)); 1957 PetscCall(VecDestroy(&b->slvec1b)); 1958 PetscCall(VecScatterDestroy(&b->sMvctx)); 1959 1960 /* Because the B will have been resized we simply destroy it and create a new one each time */ 1961 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 1962 PetscCall(MatDestroy(&b->B)); 1963 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 1964 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 1965 PetscCall(MatSetType(b->B,MATSEQBAIJ)); 1966 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 1967 1968 if (!B->preallocated) { 1969 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 1970 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 1971 PetscCall(MatSetType(b->A,MATSEQSBAIJ)); 1972 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 1973 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash)); 1974 } 1975 1976 PetscCall(MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz)); 1977 PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz)); 1978 1979 B->preallocated = PETSC_TRUE; 1980 B->was_assembled = PETSC_FALSE; 1981 B->assembled = PETSC_FALSE; 1982 PetscFunctionReturn(0); 1983 } 1984 1985 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1986 { 1987 PetscInt m,rstart,cend; 1988 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 1989 const PetscInt *JJ =NULL; 1990 PetscScalar *values=NULL; 1991 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 1992 PetscBool nooffprocentries; 1993 1994 PetscFunctionBegin; 1995 PetscCheck(bs >= 1,PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs); 1996 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 1997 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 1998 PetscCall(PetscLayoutSetUp(B->rmap)); 1999 PetscCall(PetscLayoutSetUp(B->cmap)); 2000 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2001 m = B->rmap->n/bs; 2002 rstart = B->rmap->rstart/bs; 2003 cend = B->cmap->rend/bs; 2004 2005 PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2006 PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz)); 2007 for (i=0; i<m; i++) { 2008 nz = ii[i+1] - ii[i]; 2009 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2010 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2011 JJ = jj + ii[i]; 2012 bd = 0; 2013 for (j=0; j<nz; j++) { 2014 if (*JJ >= i + rstart) break; 2015 JJ++; 2016 bd++; 2017 } 2018 d = 0; 2019 for (; j<nz; j++) { 2020 if (*JJ++ >= cend) break; 2021 d++; 2022 } 2023 d_nnz[i] = d; 2024 o_nnz[i] = nz - d - bd; 2025 nz = nz - bd; 2026 nz_max = PetscMax(nz_max,nz); 2027 } 2028 PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz)); 2029 PetscCall(MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)); 2030 PetscCall(PetscFree2(d_nnz,o_nnz)); 2031 2032 values = (PetscScalar*)V; 2033 if (!values) { 2034 PetscCall(PetscCalloc1(bs*bs*nz_max,&values)); 2035 } 2036 for (i=0; i<m; i++) { 2037 PetscInt row = i + rstart; 2038 PetscInt ncols = ii[i+1] - ii[i]; 2039 const PetscInt *icols = jj + ii[i]; 2040 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2041 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2042 PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES)); 2043 } else { /* block ordering does not match so we can only insert one block at a time. */ 2044 PetscInt j; 2045 for (j=0; j<ncols; j++) { 2046 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2047 PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES)); 2048 } 2049 } 2050 } 2051 2052 if (!V) PetscCall(PetscFree(values)); 2053 nooffprocentries = B->nooffprocentries; 2054 B->nooffprocentries = PETSC_TRUE; 2055 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2056 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2057 B->nooffprocentries = nooffprocentries; 2058 2059 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 /*MC 2064 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2065 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2066 the matrix is stored. 2067 2068 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2069 can call MatSetOption(Mat, MAT_HERMITIAN); 2070 2071 Options Database Keys: 2072 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2073 2074 Notes: 2075 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2076 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2077 2078 Level: beginner 2079 2080 .seealso: `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2081 M*/ 2082 2083 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2084 { 2085 Mat_MPISBAIJ *b; 2086 PetscBool flg = PETSC_FALSE; 2087 2088 PetscFunctionBegin; 2089 PetscCall(PetscNewLog(B,&b)); 2090 B->data = (void*)b; 2091 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 2092 2093 B->ops->destroy = MatDestroy_MPISBAIJ; 2094 B->ops->view = MatView_MPISBAIJ; 2095 B->assembled = PETSC_FALSE; 2096 B->insertmode = NOT_SET_VALUES; 2097 2098 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 2099 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size)); 2100 2101 /* build local table of row and column ownerships */ 2102 PetscCall(PetscMalloc1(b->size+2,&b->rangebs)); 2103 2104 /* build cache for off array entries formed */ 2105 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 2106 2107 b->donotstash = PETSC_FALSE; 2108 b->colmap = NULL; 2109 b->garray = NULL; 2110 b->roworiented = PETSC_TRUE; 2111 2112 /* stuff used in block assembly */ 2113 b->barray = NULL; 2114 2115 /* stuff used for matrix vector multiply */ 2116 b->lvec = NULL; 2117 b->Mvctx = NULL; 2118 b->slvec0 = NULL; 2119 b->slvec0b = NULL; 2120 b->slvec1 = NULL; 2121 b->slvec1a = NULL; 2122 b->slvec1b = NULL; 2123 b->sMvctx = NULL; 2124 2125 /* stuff for MatGetRow() */ 2126 b->rowindices = NULL; 2127 b->rowvalues = NULL; 2128 b->getrowactive = PETSC_FALSE; 2129 2130 /* hash table stuff */ 2131 b->ht = NULL; 2132 b->hd = NULL; 2133 b->ht_size = 0; 2134 b->ht_flag = PETSC_FALSE; 2135 b->ht_fact = 0; 2136 b->ht_total_ct = 0; 2137 b->ht_insert_ct = 0; 2138 2139 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2140 b->ijonly = PETSC_FALSE; 2141 2142 b->in_loc = NULL; 2143 b->v_loc = NULL; 2144 b->n_loc = 0; 2145 2146 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ)); 2147 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ)); 2148 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ)); 2149 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2150 #if defined(PETSC_HAVE_ELEMENTAL) 2151 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental)); 2152 #endif 2153 #if defined(PETSC_HAVE_SCALAPACK) 2154 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK)); 2155 #endif 2156 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic)); 2157 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic)); 2158 2159 B->symmetric = PETSC_TRUE; 2160 B->structurally_symmetric = PETSC_TRUE; 2161 B->symmetric_set = PETSC_TRUE; 2162 B->structurally_symmetric_set = PETSC_TRUE; 2163 B->symmetric_eternal = PETSC_TRUE; 2164 #if defined(PETSC_USE_COMPLEX) 2165 B->hermitian = PETSC_FALSE; 2166 B->hermitian_set = PETSC_FALSE; 2167 #else 2168 B->hermitian = PETSC_TRUE; 2169 B->hermitian_set = PETSC_TRUE; 2170 #endif 2171 2172 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ)); 2173 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat"); 2174 PetscCall(PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL)); 2175 if (flg) { 2176 PetscReal fact = 1.39; 2177 PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE)); 2178 PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL)); 2179 if (fact <= 1.0) fact = 1.39; 2180 PetscCall(MatMPIBAIJSetHashTableFactor(B,fact)); 2181 PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact)); 2182 } 2183 PetscOptionsEnd(); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 /*MC 2188 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2189 2190 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2191 and MATMPISBAIJ otherwise. 2192 2193 Options Database Keys: 2194 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2195 2196 Level: beginner 2197 2198 .seealso: `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2199 M*/ 2200 2201 /*@C 2202 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2203 the user should preallocate the matrix storage by setting the parameters 2204 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2205 performance can be increased by more than a factor of 50. 2206 2207 Collective on Mat 2208 2209 Input Parameters: 2210 + B - the matrix 2211 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2212 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2213 . d_nz - number of block nonzeros per block row in diagonal portion of local 2214 submatrix (same for all local rows) 2215 . d_nnz - array containing the number of block nonzeros in the various block rows 2216 in the upper triangular and diagonal part of the in diagonal portion of the local 2217 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2218 for the diagonal entry and set a value even if it is zero. 2219 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2220 submatrix (same for all local rows). 2221 - o_nnz - array containing the number of nonzeros in the various block rows of the 2222 off-diagonal portion of the local submatrix that is right of the diagonal 2223 (possibly different for each block row) or NULL. 2224 2225 Options Database Keys: 2226 + -mat_no_unroll - uses code that does not unroll the loops in the 2227 block calculations (much slower) 2228 - -mat_block_size - size of the blocks to use 2229 2230 Notes: 2231 2232 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2233 than it must be used on all processors that share the object for that argument. 2234 2235 If the *_nnz parameter is given then the *_nz parameter is ignored 2236 2237 Storage Information: 2238 For a square global matrix we define each processor's diagonal portion 2239 to be its local rows and the corresponding columns (a square submatrix); 2240 each processor's off-diagonal portion encompasses the remainder of the 2241 local matrix (a rectangular submatrix). 2242 2243 The user can specify preallocated storage for the diagonal part of 2244 the local submatrix with either d_nz or d_nnz (not both). Set 2245 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2246 memory allocation. Likewise, specify preallocated storage for the 2247 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2248 2249 You can call MatGetInfo() to get information on how effective the preallocation was; 2250 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2251 You can also run with the option -info and look for messages with the string 2252 malloc in them to see if additional memory allocation was needed. 2253 2254 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2255 the figure below we depict these three local rows and all columns (0-11). 2256 2257 .vb 2258 0 1 2 3 4 5 6 7 8 9 10 11 2259 -------------------------- 2260 row 3 |. . . d d d o o o o o o 2261 row 4 |. . . d d d o o o o o o 2262 row 5 |. . . d d d o o o o o o 2263 -------------------------- 2264 .ve 2265 2266 Thus, any entries in the d locations are stored in the d (diagonal) 2267 submatrix, and any entries in the o locations are stored in the 2268 o (off-diagonal) submatrix. Note that the d matrix is stored in 2269 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2270 2271 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2272 plus the diagonal part of the d matrix, 2273 and o_nz should indicate the number of block nonzeros per row in the o matrix 2274 2275 In general, for PDE problems in which most nonzeros are near the diagonal, 2276 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2277 or you will get TERRIBLE performance; see the users' manual chapter on 2278 matrices. 2279 2280 Level: intermediate 2281 2282 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2283 @*/ 2284 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2285 { 2286 PetscFunctionBegin; 2287 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2288 PetscValidType(B,1); 2289 PetscValidLogicalCollectiveInt(B,bs,2); 2290 PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz)); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 /*@C 2295 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2296 (block compressed row). For good matrix assembly performance 2297 the user should preallocate the matrix storage by setting the parameters 2298 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2299 performance can be increased by more than a factor of 50. 2300 2301 Collective 2302 2303 Input Parameters: 2304 + comm - MPI communicator 2305 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2306 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2307 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2308 This value should be the same as the local size used in creating the 2309 y vector for the matrix-vector product y = Ax. 2310 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2311 This value should be the same as the local size used in creating the 2312 x vector for the matrix-vector product y = Ax. 2313 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2314 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2315 . d_nz - number of block nonzeros per block row in diagonal portion of local 2316 submatrix (same for all local rows) 2317 . d_nnz - array containing the number of block nonzeros in the various block rows 2318 in the upper triangular portion of the in diagonal portion of the local 2319 (possibly different for each block block row) or NULL. 2320 If you plan to factor the matrix you must leave room for the diagonal entry and 2321 set its value even if it is zero. 2322 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2323 submatrix (same for all local rows). 2324 - o_nnz - array containing the number of nonzeros in the various block rows of the 2325 off-diagonal portion of the local submatrix (possibly different for 2326 each block row) or NULL. 2327 2328 Output Parameter: 2329 . A - the matrix 2330 2331 Options Database Keys: 2332 + -mat_no_unroll - uses code that does not unroll the loops in the 2333 block calculations (much slower) 2334 . -mat_block_size - size of the blocks to use 2335 - -mat_mpi - use the parallel matrix data structures even on one processor 2336 (defaults to using SeqBAIJ format on one processor) 2337 2338 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2339 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2340 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2341 2342 Notes: 2343 The number of rows and columns must be divisible by blocksize. 2344 This matrix type does not support complex Hermitian operation. 2345 2346 The user MUST specify either the local or global matrix dimensions 2347 (possibly both). 2348 2349 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2350 than it must be used on all processors that share the object for that argument. 2351 2352 If the *_nnz parameter is given then the *_nz parameter is ignored 2353 2354 Storage Information: 2355 For a square global matrix we define each processor's diagonal portion 2356 to be its local rows and the corresponding columns (a square submatrix); 2357 each processor's off-diagonal portion encompasses the remainder of the 2358 local matrix (a rectangular submatrix). 2359 2360 The user can specify preallocated storage for the diagonal part of 2361 the local submatrix with either d_nz or d_nnz (not both). Set 2362 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2363 memory allocation. Likewise, specify preallocated storage for the 2364 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2365 2366 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2367 the figure below we depict these three local rows and all columns (0-11). 2368 2369 .vb 2370 0 1 2 3 4 5 6 7 8 9 10 11 2371 -------------------------- 2372 row 3 |. . . d d d o o o o o o 2373 row 4 |. . . d d d o o o o o o 2374 row 5 |. . . d d d o o o o o o 2375 -------------------------- 2376 .ve 2377 2378 Thus, any entries in the d locations are stored in the d (diagonal) 2379 submatrix, and any entries in the o locations are stored in the 2380 o (off-diagonal) submatrix. Note that the d matrix is stored in 2381 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2382 2383 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2384 plus the diagonal part of the d matrix, 2385 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2386 In general, for PDE problems in which most nonzeros are near the diagonal, 2387 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2388 or you will get TERRIBLE performance; see the users' manual chapter on 2389 matrices. 2390 2391 Level: intermediate 2392 2393 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2394 @*/ 2395 2396 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2397 { 2398 PetscMPIInt size; 2399 2400 PetscFunctionBegin; 2401 PetscCall(MatCreate(comm,A)); 2402 PetscCall(MatSetSizes(*A,m,n,M,N)); 2403 PetscCallMPI(MPI_Comm_size(comm,&size)); 2404 if (size > 1) { 2405 PetscCall(MatSetType(*A,MATMPISBAIJ)); 2406 PetscCall(MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz)); 2407 } else { 2408 PetscCall(MatSetType(*A,MATSEQSBAIJ)); 2409 PetscCall(MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz)); 2410 } 2411 PetscFunctionReturn(0); 2412 } 2413 2414 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2415 { 2416 Mat mat; 2417 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2418 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2419 PetscScalar *array; 2420 2421 PetscFunctionBegin; 2422 *newmat = NULL; 2423 2424 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2425 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2426 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2427 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2428 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2429 2430 mat->factortype = matin->factortype; 2431 mat->preallocated = PETSC_TRUE; 2432 mat->assembled = PETSC_TRUE; 2433 mat->insertmode = NOT_SET_VALUES; 2434 2435 a = (Mat_MPISBAIJ*)mat->data; 2436 a->bs2 = oldmat->bs2; 2437 a->mbs = oldmat->mbs; 2438 a->nbs = oldmat->nbs; 2439 a->Mbs = oldmat->Mbs; 2440 a->Nbs = oldmat->Nbs; 2441 2442 a->size = oldmat->size; 2443 a->rank = oldmat->rank; 2444 a->donotstash = oldmat->donotstash; 2445 a->roworiented = oldmat->roworiented; 2446 a->rowindices = NULL; 2447 a->rowvalues = NULL; 2448 a->getrowactive = PETSC_FALSE; 2449 a->barray = NULL; 2450 a->rstartbs = oldmat->rstartbs; 2451 a->rendbs = oldmat->rendbs; 2452 a->cstartbs = oldmat->cstartbs; 2453 a->cendbs = oldmat->cendbs; 2454 2455 /* hash table stuff */ 2456 a->ht = NULL; 2457 a->hd = NULL; 2458 a->ht_size = 0; 2459 a->ht_flag = oldmat->ht_flag; 2460 a->ht_fact = oldmat->ht_fact; 2461 a->ht_total_ct = 0; 2462 a->ht_insert_ct = 0; 2463 2464 PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2)); 2465 if (oldmat->colmap) { 2466 #if defined(PETSC_USE_CTABLE) 2467 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2468 #else 2469 PetscCall(PetscMalloc1(a->Nbs,&a->colmap)); 2470 PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt))); 2471 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs)); 2472 #endif 2473 } else a->colmap = NULL; 2474 2475 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2476 PetscCall(PetscMalloc1(len,&a->garray)); 2477 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2478 PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2479 } else a->garray = NULL; 2480 2481 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash)); 2482 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2483 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2484 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2485 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2486 2487 PetscCall(VecDuplicate(oldmat->slvec0,&a->slvec0)); 2488 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0)); 2489 PetscCall(VecDuplicate(oldmat->slvec1,&a->slvec1)); 2490 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1)); 2491 2492 PetscCall(VecGetLocalSize(a->slvec1,&nt)); 2493 PetscCall(VecGetArray(a->slvec1,&array)); 2494 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a)); 2495 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b)); 2496 PetscCall(VecRestoreArray(a->slvec1,&array)); 2497 PetscCall(VecGetArray(a->slvec0,&array)); 2498 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b)); 2499 PetscCall(VecRestoreArray(a->slvec0,&array)); 2500 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0)); 2501 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1)); 2502 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b)); 2503 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a)); 2504 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b)); 2505 2506 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2507 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2508 a->sMvctx = oldmat->sMvctx; 2509 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx)); 2510 2511 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2512 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2513 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2514 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2515 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2516 *newmat = mat; 2517 PetscFunctionReturn(0); 2518 } 2519 2520 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2521 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2522 2523 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer) 2524 { 2525 PetscBool isbinary; 2526 2527 PetscFunctionBegin; 2528 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2529 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 2530 PetscCall(MatLoad_MPISBAIJ_Binary(mat,viewer)); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 /*XXXXX@ 2535 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2536 2537 Input Parameters: 2538 . mat - the matrix 2539 . fact - factor 2540 2541 Not Collective on Mat, each process can have a different hash factor 2542 2543 Level: advanced 2544 2545 Notes: 2546 This can also be set by the command line option: -mat_use_hash_table fact 2547 2548 .seealso: `MatSetOption()` 2549 @XXXXX*/ 2550 2551 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2552 { 2553 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2554 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2555 PetscReal atmp; 2556 PetscReal *work,*svalues,*rvalues; 2557 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2558 PetscMPIInt rank,size; 2559 PetscInt *rowners_bs,dest,count,source; 2560 PetscScalar *va; 2561 MatScalar *ba; 2562 MPI_Status stat; 2563 2564 PetscFunctionBegin; 2565 PetscCheck(!idx,PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2566 PetscCall(MatGetRowMaxAbs(a->A,v,NULL)); 2567 PetscCall(VecGetArray(v,&va)); 2568 2569 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 2570 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 2571 2572 bs = A->rmap->bs; 2573 mbs = a->mbs; 2574 Mbs = a->Mbs; 2575 ba = b->a; 2576 bi = b->i; 2577 bj = b->j; 2578 2579 /* find ownerships */ 2580 rowners_bs = A->rmap->range; 2581 2582 /* each proc creates an array to be distributed */ 2583 PetscCall(PetscCalloc1(bs*Mbs,&work)); 2584 2585 /* row_max for B */ 2586 if (rank != size-1) { 2587 for (i=0; i<mbs; i++) { 2588 ncols = bi[1] - bi[0]; bi++; 2589 brow = bs*i; 2590 for (j=0; j<ncols; j++) { 2591 bcol = bs*(*bj); 2592 for (kcol=0; kcol<bs; kcol++) { 2593 col = bcol + kcol; /* local col index */ 2594 col += rowners_bs[rank+1]; /* global col index */ 2595 for (krow=0; krow<bs; krow++) { 2596 atmp = PetscAbsScalar(*ba); ba++; 2597 row = brow + krow; /* local row index */ 2598 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2599 if (work[col] < atmp) work[col] = atmp; 2600 } 2601 } 2602 bj++; 2603 } 2604 } 2605 2606 /* send values to its owners */ 2607 for (dest=rank+1; dest<size; dest++) { 2608 svalues = work + rowners_bs[dest]; 2609 count = rowners_bs[dest+1]-rowners_bs[dest]; 2610 PetscCallMPI(MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A))); 2611 } 2612 } 2613 2614 /* receive values */ 2615 if (rank) { 2616 rvalues = work; 2617 count = rowners_bs[rank+1]-rowners_bs[rank]; 2618 for (source=0; source<rank; source++) { 2619 PetscCallMPI(MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat)); 2620 /* process values */ 2621 for (i=0; i<count; i++) { 2622 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2623 } 2624 } 2625 } 2626 2627 PetscCall(VecRestoreArray(v,&va)); 2628 PetscCall(PetscFree(work)); 2629 PetscFunctionReturn(0); 2630 } 2631 2632 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2633 { 2634 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2635 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2636 PetscScalar *x,*ptr,*from; 2637 Vec bb1; 2638 const PetscScalar *b; 2639 2640 PetscFunctionBegin; 2641 PetscCheck(its > 0 && lits > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 2642 PetscCheck(bs <= 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2643 2644 if (flag == SOR_APPLY_UPPER) { 2645 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2646 PetscFunctionReturn(0); 2647 } 2648 2649 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2650 if (flag & SOR_ZERO_INITIAL_GUESS) { 2651 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx)); 2652 its--; 2653 } 2654 2655 PetscCall(VecDuplicate(bb,&bb1)); 2656 while (its--) { 2657 2658 /* lower triangular part: slvec0b = - B^T*xx */ 2659 PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b)); 2660 2661 /* copy xx into slvec0a */ 2662 PetscCall(VecGetArray(mat->slvec0,&ptr)); 2663 PetscCall(VecGetArray(xx,&x)); 2664 PetscCall(PetscArraycpy(ptr,x,bs*mbs)); 2665 PetscCall(VecRestoreArray(mat->slvec0,&ptr)); 2666 2667 PetscCall(VecScale(mat->slvec0,-1.0)); 2668 2669 /* copy bb into slvec1a */ 2670 PetscCall(VecGetArray(mat->slvec1,&ptr)); 2671 PetscCall(VecGetArrayRead(bb,&b)); 2672 PetscCall(PetscArraycpy(ptr,b,bs*mbs)); 2673 PetscCall(VecRestoreArray(mat->slvec1,&ptr)); 2674 2675 /* set slvec1b = 0 */ 2676 PetscCall(VecSet(mat->slvec1b,0.0)); 2677 2678 PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2679 PetscCall(VecRestoreArray(xx,&x)); 2680 PetscCall(VecRestoreArrayRead(bb,&b)); 2681 PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2682 2683 /* upper triangular part: bb1 = bb1 - B*x */ 2684 PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1)); 2685 2686 /* local diagonal sweep */ 2687 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx)); 2688 } 2689 PetscCall(VecDestroy(&bb1)); 2690 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2691 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2692 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2693 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2694 } else if (flag & SOR_EISENSTAT) { 2695 Vec xx1; 2696 PetscBool hasop; 2697 const PetscScalar *diag; 2698 PetscScalar *sl,scale = (omega - 2.0)/omega; 2699 PetscInt i,n; 2700 2701 if (!mat->xx1) { 2702 PetscCall(VecDuplicate(bb,&mat->xx1)); 2703 PetscCall(VecDuplicate(bb,&mat->bb1)); 2704 } 2705 xx1 = mat->xx1; 2706 bb1 = mat->bb1; 2707 2708 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 2709 2710 if (!mat->diag) { 2711 /* this is wrong for same matrix with new nonzero values */ 2712 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 2713 PetscCall(MatGetDiagonal(matin,mat->diag)); 2714 } 2715 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 2716 2717 if (hasop) { 2718 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 2719 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2720 } else { 2721 /* 2722 These two lines are replaced by code that may be a bit faster for a good compiler 2723 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2724 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2725 */ 2726 PetscCall(VecGetArray(mat->slvec1a,&sl)); 2727 PetscCall(VecGetArrayRead(mat->diag,&diag)); 2728 PetscCall(VecGetArrayRead(bb,&b)); 2729 PetscCall(VecGetArray(xx,&x)); 2730 PetscCall(VecGetLocalSize(xx,&n)); 2731 if (omega == 1.0) { 2732 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2733 PetscCall(PetscLogFlops(2.0*n)); 2734 } else { 2735 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2736 PetscCall(PetscLogFlops(3.0*n)); 2737 } 2738 PetscCall(VecRestoreArray(mat->slvec1a,&sl)); 2739 PetscCall(VecRestoreArrayRead(mat->diag,&diag)); 2740 PetscCall(VecRestoreArrayRead(bb,&b)); 2741 PetscCall(VecRestoreArray(xx,&x)); 2742 } 2743 2744 /* multiply off-diagonal portion of matrix */ 2745 PetscCall(VecSet(mat->slvec1b,0.0)); 2746 PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b)); 2747 PetscCall(VecGetArray(mat->slvec0,&from)); 2748 PetscCall(VecGetArray(xx,&x)); 2749 PetscCall(PetscArraycpy(from,x,bs*mbs)); 2750 PetscCall(VecRestoreArray(mat->slvec0,&from)); 2751 PetscCall(VecRestoreArray(xx,&x)); 2752 PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2753 PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2754 PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a)); 2755 2756 /* local sweep */ 2757 PetscCall((*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 2758 PetscCall(VecAXPY(xx,1.0,xx1)); 2759 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2760 PetscFunctionReturn(0); 2761 } 2762 2763 /*@ 2764 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2765 CSR format the local rows. 2766 2767 Collective 2768 2769 Input Parameters: 2770 + comm - MPI communicator 2771 . bs - the block size, only a block size of 1 is supported 2772 . m - number of local rows (Cannot be PETSC_DECIDE) 2773 . n - This value should be the same as the local size used in creating the 2774 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2775 calculated if N is given) For square matrices n is almost always m. 2776 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2777 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2778 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2779 . j - column indices 2780 - a - matrix values 2781 2782 Output Parameter: 2783 . mat - the matrix 2784 2785 Level: intermediate 2786 2787 Notes: 2788 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2789 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2790 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2791 2792 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2793 2794 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2795 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2796 @*/ 2797 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2798 { 2799 PetscFunctionBegin; 2800 PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2801 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2802 PetscCall(MatCreate(comm,mat)); 2803 PetscCall(MatSetSizes(*mat,m,n,M,N)); 2804 PetscCall(MatSetType(*mat,MATMPISBAIJ)); 2805 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a)); 2806 PetscFunctionReturn(0); 2807 } 2808 2809 /*@C 2810 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 2811 2812 Collective 2813 2814 Input Parameters: 2815 + B - the matrix 2816 . bs - the block size 2817 . i - the indices into j for the start of each local row (starts with zero) 2818 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2819 - v - optional values in the matrix 2820 2821 Level: advanced 2822 2823 Notes: 2824 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2825 and usually the numerical values as well 2826 2827 Any entries below the diagonal are ignored 2828 2829 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2830 @*/ 2831 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2832 { 2833 PetscFunctionBegin; 2834 PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 2835 PetscFunctionReturn(0); 2836 } 2837 2838 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2839 { 2840 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2841 PetscInt *indx; 2842 PetscScalar *values; 2843 2844 PetscFunctionBegin; 2845 PetscCall(MatGetSize(inmat,&m,&N)); 2846 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2847 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2848 PetscInt *dnz,*onz,mbs,Nbs,nbs; 2849 PetscInt *bindx,rmax=a->rmax,j; 2850 PetscMPIInt rank,size; 2851 2852 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 2853 mbs = m/bs; Nbs = N/cbs; 2854 if (n == PETSC_DECIDE) { 2855 PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N)); 2856 } 2857 nbs = n/cbs; 2858 2859 PetscCall(PetscMalloc1(rmax,&bindx)); 2860 MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */ 2861 2862 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 2863 PetscCallMPI(MPI_Comm_rank(comm,&size)); 2864 if (rank == size-1) { 2865 /* Check sum(nbs) = Nbs */ 2866 PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 2867 } 2868 2869 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2870 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 2871 for (i=0; i<mbs; i++) { 2872 PetscCall(MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */ 2873 nnz = nnz/bs; 2874 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2875 PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz)); 2876 PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); 2877 } 2878 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 2879 PetscCall(PetscFree(bindx)); 2880 2881 PetscCall(MatCreate(comm,outmat)); 2882 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 2883 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 2884 PetscCall(MatSetType(*outmat,MATSBAIJ)); 2885 PetscCall(MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz)); 2886 PetscCall(MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz)); 2887 MatPreallocateEnd(dnz,onz); 2888 } 2889 2890 /* numeric phase */ 2891 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 2892 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 2893 2894 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 2895 for (i=0; i<m; i++) { 2896 PetscCall(MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values)); 2897 Ii = i + rstart; 2898 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 2899 PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values)); 2900 } 2901 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 2902 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 2903 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 2904 PetscFunctionReturn(0); 2905 } 2906