1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #if defined(PETSC_HAVE_SCALAPACK) 11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 12 #endif 13 14 /* This could be moved to matimpl.h */ 15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 16 { 17 Mat preallocator; 18 PetscInt r,rstart,rend; 19 PetscInt bs,i,m,n,M,N; 20 PetscBool cong = PETSC_TRUE; 21 22 PetscFunctionBegin; 23 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 24 PetscValidLogicalCollectiveInt(B,nm,2); 25 for (i = 0; i < nm; i++) { 26 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 27 PetscCall(PetscLayoutCompare(B->rmap,X[i]->rmap,&cong)); 28 PetscCheck(cong,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 29 } 30 PetscValidLogicalCollectiveBool(B,fill,5); 31 PetscCall(MatGetBlockSize(B,&bs)); 32 PetscCall(MatGetSize(B,&M,&N)); 33 PetscCall(MatGetLocalSize(B,&m,&n)); 34 PetscCall(MatCreate(PetscObjectComm((PetscObject)B),&preallocator)); 35 PetscCall(MatSetType(preallocator,MATPREALLOCATOR)); 36 PetscCall(MatSetBlockSize(preallocator,bs)); 37 PetscCall(MatSetSizes(preallocator,m,n,M,N)); 38 PetscCall(MatSetUp(preallocator)); 39 PetscCall(MatGetOwnershipRange(preallocator,&rstart,&rend)); 40 for (r = rstart; r < rend; ++r) { 41 PetscInt ncols; 42 const PetscInt *row; 43 const PetscScalar *vals; 44 45 for (i = 0; i < nm; i++) { 46 PetscCall(MatGetRow(X[i],r,&ncols,&row,&vals)); 47 PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES)); 48 if (symm && symm[i]) { 49 PetscCall(MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES)); 50 } 51 PetscCall(MatRestoreRow(X[i],r,&ncols,&row,&vals)); 52 } 53 } 54 PetscCall(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY)); 55 PetscCall(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY)); 56 PetscCall(MatPreallocatorPreallocate(preallocator,fill,B)); 57 PetscCall(MatDestroy(&preallocator)); 58 PetscFunctionReturn(0); 59 } 60 61 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 62 { 63 Mat B; 64 PetscInt r; 65 66 PetscFunctionBegin; 67 if (reuse != MAT_REUSE_MATRIX) { 68 PetscBool symm = PETSC_TRUE,isdense; 69 PetscInt bs; 70 71 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 72 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 73 PetscCall(MatSetType(B,newtype)); 74 PetscCall(MatGetBlockSize(A,&bs)); 75 PetscCall(MatSetBlockSize(B,bs)); 76 PetscCall(PetscLayoutSetUp(B->rmap)); 77 PetscCall(PetscLayoutSetUp(B->cmap)); 78 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"")); 79 if (!isdense) { 80 PetscCall(MatGetRowUpperTriangular(A)); 81 PetscCall(MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE)); 82 PetscCall(MatRestoreRowUpperTriangular(A)); 83 } else { 84 PetscCall(MatSetUp(B)); 85 } 86 } else { 87 B = *newmat; 88 PetscCall(MatZeroEntries(B)); 89 } 90 91 PetscCall(MatGetRowUpperTriangular(A)); 92 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 93 PetscInt ncols; 94 const PetscInt *row; 95 const PetscScalar *vals; 96 97 PetscCall(MatGetRow(A,r,&ncols,&row,&vals)); 98 PetscCall(MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES)); 99 #if defined(PETSC_USE_COMPLEX) 100 if (A->hermitian) { 101 PetscInt i; 102 for (i = 0; i < ncols; i++) { 103 PetscCall(MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES)); 104 } 105 } else { 106 PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES)); 107 } 108 #else 109 PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES)); 110 #endif 111 PetscCall(MatRestoreRow(A,r,&ncols,&row,&vals)); 112 } 113 PetscCall(MatRestoreRowUpperTriangular(A)); 114 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 115 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 116 117 if (reuse == MAT_INPLACE_MATRIX) { 118 PetscCall(MatHeaderReplace(A,&B)); 119 } else { 120 *newmat = B; 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 126 { 127 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 128 129 PetscFunctionBegin; 130 PetscCall(MatStoreValues(aij->A)); 131 PetscCall(MatStoreValues(aij->B)); 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 136 { 137 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 138 139 PetscFunctionBegin; 140 PetscCall(MatRetrieveValues(aij->A)); 141 PetscCall(MatRetrieveValues(aij->B)); 142 PetscFunctionReturn(0); 143 } 144 145 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 146 { \ 147 brow = row/bs; \ 148 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 149 rmax = aimax[brow]; nrow = ailen[brow]; \ 150 bcol = col/bs; \ 151 ridx = row % bs; cidx = col % bs; \ 152 low = 0; high = nrow; \ 153 while (high-low > 3) { \ 154 t = (low+high)/2; \ 155 if (rp[t] > bcol) high = t; \ 156 else low = t; \ 157 } \ 158 for (_i=low; _i<high; _i++) { \ 159 if (rp[_i] > bcol) break; \ 160 if (rp[_i] == bcol) { \ 161 bap = ap + bs2*_i + bs*cidx + ridx; \ 162 if (addv == ADD_VALUES) *bap += value; \ 163 else *bap = value; \ 164 goto a_noinsert; \ 165 } \ 166 } \ 167 if (a->nonew == 1) goto a_noinsert; \ 168 PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 169 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 170 N = nrow++ - 1; \ 171 /* shift up all the later entries in this row */ \ 172 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \ 173 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 174 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 175 rp[_i] = bcol; \ 176 ap[bs2*_i + bs*cidx + ridx] = value; \ 177 A->nonzerostate++;\ 178 a_noinsert:; \ 179 ailen[brow] = nrow; \ 180 } 181 182 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 183 { \ 184 brow = row/bs; \ 185 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 186 rmax = bimax[brow]; nrow = bilen[brow]; \ 187 bcol = col/bs; \ 188 ridx = row % bs; cidx = col % bs; \ 189 low = 0; high = nrow; \ 190 while (high-low > 3) { \ 191 t = (low+high)/2; \ 192 if (rp[t] > bcol) high = t; \ 193 else low = t; \ 194 } \ 195 for (_i=low; _i<high; _i++) { \ 196 if (rp[_i] > bcol) break; \ 197 if (rp[_i] == bcol) { \ 198 bap = ap + bs2*_i + bs*cidx + ridx; \ 199 if (addv == ADD_VALUES) *bap += value; \ 200 else *bap = value; \ 201 goto b_noinsert; \ 202 } \ 203 } \ 204 if (b->nonew == 1) goto b_noinsert; \ 205 PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 206 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 207 N = nrow++ - 1; \ 208 /* shift up all the later entries in this row */ \ 209 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \ 210 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 211 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 212 rp[_i] = bcol; \ 213 ap[bs2*_i + bs*cidx + ridx] = value; \ 214 B->nonzerostate++;\ 215 b_noinsert:; \ 216 bilen[brow] = nrow; \ 217 } 218 219 /* Only add/insert a(i,j) with i<=j (blocks). 220 Any a(i,j) with i>j input by user is ingored or generates an error 221 */ 222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 223 { 224 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 225 MatScalar value; 226 PetscBool roworiented = baij->roworiented; 227 PetscInt i,j,row,col; 228 PetscInt rstart_orig=mat->rmap->rstart; 229 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 230 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 231 232 /* Some Variables required in the macro */ 233 Mat A = baij->A; 234 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 235 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 236 MatScalar *aa =a->a; 237 238 Mat B = baij->B; 239 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 240 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 241 MatScalar *ba =b->a; 242 243 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 244 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 245 MatScalar *ap,*bap; 246 247 /* for stash */ 248 PetscInt n_loc, *in_loc = NULL; 249 MatScalar *v_loc = NULL; 250 251 PetscFunctionBegin; 252 if (!baij->donotstash) { 253 if (n > baij->n_loc) { 254 PetscCall(PetscFree(baij->in_loc)); 255 PetscCall(PetscFree(baij->v_loc)); 256 PetscCall(PetscMalloc1(n,&baij->in_loc)); 257 PetscCall(PetscMalloc1(n,&baij->v_loc)); 258 259 baij->n_loc = n; 260 } 261 in_loc = baij->in_loc; 262 v_loc = baij->v_loc; 263 } 264 265 for (i=0; i<m; i++) { 266 if (im[i] < 0) continue; 267 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 268 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 269 row = im[i] - rstart_orig; /* local row index */ 270 for (j=0; j<n; j++) { 271 if (im[i]/bs > in[j]/bs) { 272 if (a->ignore_ltriangular) { 273 continue; /* ignore lower triangular blocks */ 274 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 275 } 276 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 277 col = in[j] - cstart_orig; /* local col index */ 278 brow = row/bs; bcol = col/bs; 279 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 280 if (roworiented) value = v[i*n+j]; 281 else value = v[i+j*m]; 282 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 283 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 284 } else if (in[j] < 0) continue; 285 else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 286 else { /* off-diag entry (B) */ 287 if (mat->was_assembled) { 288 if (!baij->colmap) { 289 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 290 } 291 #if defined(PETSC_USE_CTABLE) 292 PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col)); 293 col = col - 1; 294 #else 295 col = baij->colmap[in[j]/bs] - 1; 296 #endif 297 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 298 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 299 col = in[j]; 300 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 301 B = baij->B; 302 b = (Mat_SeqBAIJ*)(B)->data; 303 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 304 ba = b->a; 305 } else col += in[j]%bs; 306 } else col = in[j]; 307 if (roworiented) value = v[i*n+j]; 308 else value = v[i+j*m]; 309 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 310 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 311 } 312 } 313 } else { /* off processor entry */ 314 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 315 if (!baij->donotstash) { 316 mat->assembled = PETSC_FALSE; 317 n_loc = 0; 318 for (j=0; j<n; j++) { 319 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 320 in_loc[n_loc] = in[j]; 321 if (roworiented) { 322 v_loc[n_loc] = v[i*n+j]; 323 } else { 324 v_loc[n_loc] = v[j*m+i]; 325 } 326 n_loc++; 327 } 328 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE)); 329 } 330 } 331 } 332 PetscFunctionReturn(0); 333 } 334 335 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 336 { 337 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 338 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 339 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 340 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 341 PetscBool roworiented=a->roworiented; 342 const PetscScalar *value = v; 343 MatScalar *ap,*aa = a->a,*bap; 344 345 PetscFunctionBegin; 346 if (col < row) { 347 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 348 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 349 } 350 rp = aj + ai[row]; 351 ap = aa + bs2*ai[row]; 352 rmax = imax[row]; 353 nrow = ailen[row]; 354 value = v; 355 low = 0; 356 high = nrow; 357 358 while (high-low > 7) { 359 t = (low+high)/2; 360 if (rp[t] > col) high = t; 361 else low = t; 362 } 363 for (i=low; i<high; i++) { 364 if (rp[i] > col) break; 365 if (rp[i] == col) { 366 bap = ap + bs2*i; 367 if (roworiented) { 368 if (is == ADD_VALUES) { 369 for (ii=0; ii<bs; ii++) { 370 for (jj=ii; jj<bs2; jj+=bs) { 371 bap[jj] += *value++; 372 } 373 } 374 } else { 375 for (ii=0; ii<bs; ii++) { 376 for (jj=ii; jj<bs2; jj+=bs) { 377 bap[jj] = *value++; 378 } 379 } 380 } 381 } else { 382 if (is == ADD_VALUES) { 383 for (ii=0; ii<bs; ii++) { 384 for (jj=0; jj<bs; jj++) { 385 *bap++ += *value++; 386 } 387 } 388 } else { 389 for (ii=0; ii<bs; ii++) { 390 for (jj=0; jj<bs; jj++) { 391 *bap++ = *value++; 392 } 393 } 394 } 395 } 396 goto noinsert2; 397 } 398 } 399 if (nonew == 1) goto noinsert2; 400 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 401 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 402 N = nrow++ - 1; high++; 403 /* shift up all the later entries in this row */ 404 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 405 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 406 rp[i] = col; 407 bap = ap + bs2*i; 408 if (roworiented) { 409 for (ii=0; ii<bs; ii++) { 410 for (jj=ii; jj<bs2; jj+=bs) { 411 bap[jj] = *value++; 412 } 413 } 414 } else { 415 for (ii=0; ii<bs; ii++) { 416 for (jj=0; jj<bs; jj++) { 417 *bap++ = *value++; 418 } 419 } 420 } 421 noinsert2:; 422 ailen[row] = nrow; 423 PetscFunctionReturn(0); 424 } 425 426 /* 427 This routine is exactly duplicated in mpibaij.c 428 */ 429 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 430 { 431 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 432 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 433 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 434 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 435 PetscBool roworiented=a->roworiented; 436 const PetscScalar *value = v; 437 MatScalar *ap,*aa = a->a,*bap; 438 439 PetscFunctionBegin; 440 rp = aj + ai[row]; 441 ap = aa + bs2*ai[row]; 442 rmax = imax[row]; 443 nrow = ailen[row]; 444 low = 0; 445 high = nrow; 446 value = v; 447 while (high-low > 7) { 448 t = (low+high)/2; 449 if (rp[t] > col) high = t; 450 else low = t; 451 } 452 for (i=low; i<high; i++) { 453 if (rp[i] > col) break; 454 if (rp[i] == col) { 455 bap = ap + bs2*i; 456 if (roworiented) { 457 if (is == ADD_VALUES) { 458 for (ii=0; ii<bs; ii++) { 459 for (jj=ii; jj<bs2; jj+=bs) { 460 bap[jj] += *value++; 461 } 462 } 463 } else { 464 for (ii=0; ii<bs; ii++) { 465 for (jj=ii; jj<bs2; jj+=bs) { 466 bap[jj] = *value++; 467 } 468 } 469 } 470 } else { 471 if (is == ADD_VALUES) { 472 for (ii=0; ii<bs; ii++,value+=bs) { 473 for (jj=0; jj<bs; jj++) { 474 bap[jj] += value[jj]; 475 } 476 bap += bs; 477 } 478 } else { 479 for (ii=0; ii<bs; ii++,value+=bs) { 480 for (jj=0; jj<bs; jj++) { 481 bap[jj] = value[jj]; 482 } 483 bap += bs; 484 } 485 } 486 } 487 goto noinsert2; 488 } 489 } 490 if (nonew == 1) goto noinsert2; 491 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 492 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 493 N = nrow++ - 1; high++; 494 /* shift up all the later entries in this row */ 495 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 496 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 497 rp[i] = col; 498 bap = ap + bs2*i; 499 if (roworiented) { 500 for (ii=0; ii<bs; ii++) { 501 for (jj=ii; jj<bs2; jj+=bs) { 502 bap[jj] = *value++; 503 } 504 } 505 } else { 506 for (ii=0; ii<bs; ii++) { 507 for (jj=0; jj<bs; jj++) { 508 *bap++ = *value++; 509 } 510 } 511 } 512 noinsert2:; 513 ailen[row] = nrow; 514 PetscFunctionReturn(0); 515 } 516 517 /* 518 This routine could be optimized by removing the need for the block copy below and passing stride information 519 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 520 */ 521 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 522 { 523 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 524 const MatScalar *value; 525 MatScalar *barray =baij->barray; 526 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 527 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 528 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 529 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 530 531 PetscFunctionBegin; 532 if (!barray) { 533 PetscCall(PetscMalloc1(bs2,&barray)); 534 baij->barray = barray; 535 } 536 537 if (roworiented) { 538 stepval = (n-1)*bs; 539 } else { 540 stepval = (m-1)*bs; 541 } 542 for (i=0; i<m; i++) { 543 if (im[i] < 0) continue; 544 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 545 if (im[i] >= rstart && im[i] < rend) { 546 row = im[i] - rstart; 547 for (j=0; j<n; j++) { 548 if (im[i] > in[j]) { 549 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 550 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 551 } 552 /* If NumCol = 1 then a copy is not required */ 553 if ((roworiented) && (n == 1)) { 554 barray = (MatScalar*) v + i*bs2; 555 } else if ((!roworiented) && (m == 1)) { 556 barray = (MatScalar*) v + j*bs2; 557 } else { /* Here a copy is required */ 558 if (roworiented) { 559 value = v + i*(stepval+bs)*bs + j*bs; 560 } else { 561 value = v + j*(stepval+bs)*bs + i*bs; 562 } 563 for (ii=0; ii<bs; ii++,value+=stepval) { 564 for (jj=0; jj<bs; jj++) { 565 *barray++ = *value++; 566 } 567 } 568 barray -=bs2; 569 } 570 571 if (in[j] >= cstart && in[j] < cend) { 572 col = in[j] - cstart; 573 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 574 } else if (in[j] < 0) continue; 575 else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 576 else { 577 if (mat->was_assembled) { 578 if (!baij->colmap) { 579 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 580 } 581 582 #if defined(PETSC_USE_DEBUG) 583 #if defined(PETSC_USE_CTABLE) 584 { PetscInt data; 585 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 586 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 587 } 588 #else 589 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 590 #endif 591 #endif 592 #if defined(PETSC_USE_CTABLE) 593 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 594 col = (col - 1)/bs; 595 #else 596 col = (baij->colmap[in[j]] - 1)/bs; 597 #endif 598 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 599 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 600 col = in[j]; 601 } 602 } else col = in[j]; 603 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 604 } 605 } 606 } else { 607 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 608 if (!baij->donotstash) { 609 if (roworiented) { 610 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 611 } else { 612 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 613 } 614 } 615 } 616 } 617 PetscFunctionReturn(0); 618 } 619 620 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 621 { 622 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 623 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 624 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 625 626 PetscFunctionBegin; 627 for (i=0; i<m; i++) { 628 if (idxm[i] < 0) continue; /* negative row */ 629 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 630 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 631 row = idxm[i] - bsrstart; 632 for (j=0; j<n; j++) { 633 if (idxn[j] < 0) continue; /* negative column */ 634 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 635 if (idxn[j] >= bscstart && idxn[j] < bscend) { 636 col = idxn[j] - bscstart; 637 PetscCall(MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j)); 638 } else { 639 if (!baij->colmap) { 640 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 641 } 642 #if defined(PETSC_USE_CTABLE) 643 PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data)); 644 data--; 645 #else 646 data = baij->colmap[idxn[j]/bs]-1; 647 #endif 648 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 649 else { 650 col = data + idxn[j]%bs; 651 PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j)); 652 } 653 } 654 } 655 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 656 } 657 PetscFunctionReturn(0); 658 } 659 660 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 661 { 662 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 663 PetscReal sum[2],*lnorm2; 664 665 PetscFunctionBegin; 666 if (baij->size == 1) { 667 PetscCall(MatNorm(baij->A,type,norm)); 668 } else { 669 if (type == NORM_FROBENIUS) { 670 PetscCall(PetscMalloc1(2,&lnorm2)); 671 PetscCall(MatNorm(baij->A,type,lnorm2)); 672 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 673 PetscCall(MatNorm(baij->B,type,lnorm2)); 674 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 675 PetscCall(MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 676 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 677 PetscCall(PetscFree(lnorm2)); 678 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 679 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 680 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 681 PetscReal *rsum,*rsum2,vabs; 682 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 683 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 684 MatScalar *v; 685 686 PetscCall(PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2)); 687 PetscCall(PetscArrayzero(rsum,mat->cmap->N)); 688 /* Amat */ 689 v = amat->a; jj = amat->j; 690 for (brow=0; brow<mbs; brow++) { 691 grow = bs*(rstart + brow); 692 nz = amat->i[brow+1] - amat->i[brow]; 693 for (bcol=0; bcol<nz; bcol++) { 694 gcol = bs*(rstart + *jj); jj++; 695 for (col=0; col<bs; col++) { 696 for (row=0; row<bs; row++) { 697 vabs = PetscAbsScalar(*v); v++; 698 rsum[gcol+col] += vabs; 699 /* non-diagonal block */ 700 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 701 } 702 } 703 } 704 PetscCall(PetscLogFlops(nz*bs*bs)); 705 } 706 /* Bmat */ 707 v = bmat->a; jj = bmat->j; 708 for (brow=0; brow<mbs; brow++) { 709 grow = bs*(rstart + brow); 710 nz = bmat->i[brow+1] - bmat->i[brow]; 711 for (bcol=0; bcol<nz; bcol++) { 712 gcol = bs*garray[*jj]; jj++; 713 for (col=0; col<bs; col++) { 714 for (row=0; row<bs; row++) { 715 vabs = PetscAbsScalar(*v); v++; 716 rsum[gcol+col] += vabs; 717 rsum[grow+row] += vabs; 718 } 719 } 720 } 721 PetscCall(PetscLogFlops(nz*bs*bs)); 722 } 723 PetscCall(MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 724 *norm = 0.0; 725 for (col=0; col<mat->cmap->N; col++) { 726 if (rsum2[col] > *norm) *norm = rsum2[col]; 727 } 728 PetscCall(PetscFree2(rsum,rsum2)); 729 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 730 } 731 PetscFunctionReturn(0); 732 } 733 734 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 735 { 736 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 737 PetscInt nstash,reallocs; 738 739 PetscFunctionBegin; 740 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 741 742 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 743 PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs)); 744 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 745 PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 746 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 747 PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 748 PetscFunctionReturn(0); 749 } 750 751 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 752 { 753 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 754 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 755 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 756 PetscInt *row,*col; 757 PetscBool other_disassembled; 758 PetscMPIInt n; 759 PetscBool r1,r2,r3; 760 MatScalar *val; 761 762 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 763 PetscFunctionBegin; 764 if (!baij->donotstash && !mat->nooffprocentries) { 765 while (1) { 766 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 767 if (!flg) break; 768 769 for (i=0; i<n;) { 770 /* Now identify the consecutive vals belonging to the same row */ 771 for (j=i,rstart=row[j]; j<n; j++) { 772 if (row[j] != rstart) break; 773 } 774 if (j < n) ncols = j-i; 775 else ncols = n-i; 776 /* Now assemble all these values with a single function call */ 777 PetscCall(MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 778 i = j; 779 } 780 } 781 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 782 /* Now process the block-stash. Since the values are stashed column-oriented, 783 set the roworiented flag to column oriented, and after MatSetValues() 784 restore the original flags */ 785 r1 = baij->roworiented; 786 r2 = a->roworiented; 787 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 788 789 baij->roworiented = PETSC_FALSE; 790 a->roworiented = PETSC_FALSE; 791 792 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 793 while (1) { 794 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg)); 795 if (!flg) break; 796 797 for (i=0; i<n;) { 798 /* Now identify the consecutive vals belonging to the same row */ 799 for (j=i,rstart=row[j]; j<n; j++) { 800 if (row[j] != rstart) break; 801 } 802 if (j < n) ncols = j-i; 803 else ncols = n-i; 804 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode)); 805 i = j; 806 } 807 } 808 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 809 810 baij->roworiented = r1; 811 a->roworiented = r2; 812 813 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 814 } 815 816 PetscCall(MatAssemblyBegin(baij->A,mode)); 817 PetscCall(MatAssemblyEnd(baij->A,mode)); 818 819 /* determine if any processor has disassembled, if so we must 820 also disassemble ourselves, in order that we may reassemble. */ 821 /* 822 if nonzero structure of submatrix B cannot change then we know that 823 no processor disassembled thus we can skip this stuff 824 */ 825 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 826 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 827 if (mat->was_assembled && !other_disassembled) { 828 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 829 } 830 } 831 832 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 833 PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ 834 } 835 PetscCall(MatAssemblyBegin(baij->B,mode)); 836 PetscCall(MatAssemblyEnd(baij->B,mode)); 837 838 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 839 840 baij->rowvalues = NULL; 841 842 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 843 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 844 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 845 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 846 } 847 PetscFunctionReturn(0); 848 } 849 850 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 851 #include <petscdraw.h> 852 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 853 { 854 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 855 PetscInt bs = mat->rmap->bs; 856 PetscMPIInt rank = baij->rank; 857 PetscBool iascii,isdraw; 858 PetscViewer sviewer; 859 PetscViewerFormat format; 860 861 PetscFunctionBegin; 862 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 863 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 864 if (iascii) { 865 PetscCall(PetscViewerGetFormat(viewer,&format)); 866 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 867 MatInfo info; 868 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 869 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 870 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 871 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory)); 872 PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info)); 873 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 874 PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info)); 875 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 876 PetscCall(PetscViewerFlush(viewer)); 877 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 878 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 879 PetscCall(VecScatterView(baij->Mvctx,viewer)); 880 PetscFunctionReturn(0); 881 } else if (format == PETSC_VIEWER_ASCII_INFO) { 882 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 883 PetscFunctionReturn(0); 884 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 885 PetscFunctionReturn(0); 886 } 887 } 888 889 if (isdraw) { 890 PetscDraw draw; 891 PetscBool isnull; 892 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 893 PetscCall(PetscDrawIsNull(draw,&isnull)); 894 if (isnull) PetscFunctionReturn(0); 895 } 896 897 { 898 /* assemble the entire matrix onto first processor. */ 899 Mat A; 900 Mat_SeqSBAIJ *Aloc; 901 Mat_SeqBAIJ *Bloc; 902 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 903 MatScalar *a; 904 const char *matname; 905 906 /* Should this be the same type as mat? */ 907 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 908 if (rank == 0) { 909 PetscCall(MatSetSizes(A,M,N,M,N)); 910 } else { 911 PetscCall(MatSetSizes(A,0,0,M,N)); 912 } 913 PetscCall(MatSetType(A,MATMPISBAIJ)); 914 PetscCall(MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL)); 915 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 916 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 917 918 /* copy over the A part */ 919 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 920 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 921 PetscCall(PetscMalloc1(bs,&rvals)); 922 923 for (i=0; i<mbs; i++) { 924 rvals[0] = bs*(baij->rstartbs + i); 925 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 926 for (j=ai[i]; j<ai[i+1]; j++) { 927 col = (baij->cstartbs+aj[j])*bs; 928 for (k=0; k<bs; k++) { 929 PetscCall(MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 930 col++; 931 a += bs; 932 } 933 } 934 } 935 /* copy over the B part */ 936 Bloc = (Mat_SeqBAIJ*)baij->B->data; 937 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 938 for (i=0; i<mbs; i++) { 939 940 rvals[0] = bs*(baij->rstartbs + i); 941 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 942 for (j=ai[i]; j<ai[i+1]; j++) { 943 col = baij->garray[aj[j]]*bs; 944 for (k=0; k<bs; k++) { 945 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 946 col++; 947 a += bs; 948 } 949 } 950 } 951 PetscCall(PetscFree(rvals)); 952 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 953 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 954 /* 955 Everyone has to call to draw the matrix since the graphics waits are 956 synchronized across all processors that share the PetscDraw object 957 */ 958 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 959 PetscCall(PetscObjectGetName((PetscObject)mat,&matname)); 960 if (rank == 0) { 961 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname)); 962 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer)); 963 } 964 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 965 PetscCall(PetscViewerFlush(viewer)); 966 PetscCall(MatDestroy(&A)); 967 } 968 PetscFunctionReturn(0); 969 } 970 971 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 972 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 973 974 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 975 { 976 PetscBool iascii,isdraw,issocket,isbinary; 977 978 PetscFunctionBegin; 979 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 980 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 981 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 982 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 983 if (iascii || isdraw || issocket) { 984 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer)); 985 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat,viewer)); 986 PetscFunctionReturn(0); 987 } 988 989 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 990 { 991 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 992 993 PetscFunctionBegin; 994 #if defined(PETSC_USE_LOG) 995 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 996 #endif 997 PetscCall(MatStashDestroy_Private(&mat->stash)); 998 PetscCall(MatStashDestroy_Private(&mat->bstash)); 999 PetscCall(MatDestroy(&baij->A)); 1000 PetscCall(MatDestroy(&baij->B)); 1001 #if defined(PETSC_USE_CTABLE) 1002 PetscCall(PetscTableDestroy(&baij->colmap)); 1003 #else 1004 PetscCall(PetscFree(baij->colmap)); 1005 #endif 1006 PetscCall(PetscFree(baij->garray)); 1007 PetscCall(VecDestroy(&baij->lvec)); 1008 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1009 PetscCall(VecDestroy(&baij->slvec0)); 1010 PetscCall(VecDestroy(&baij->slvec0b)); 1011 PetscCall(VecDestroy(&baij->slvec1)); 1012 PetscCall(VecDestroy(&baij->slvec1a)); 1013 PetscCall(VecDestroy(&baij->slvec1b)); 1014 PetscCall(VecScatterDestroy(&baij->sMvctx)); 1015 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 1016 PetscCall(PetscFree(baij->barray)); 1017 PetscCall(PetscFree(baij->hd)); 1018 PetscCall(VecDestroy(&baij->diag)); 1019 PetscCall(VecDestroy(&baij->bb1)); 1020 PetscCall(VecDestroy(&baij->xx1)); 1021 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1022 PetscCall(PetscFree(baij->setvaluescopy)); 1023 #endif 1024 PetscCall(PetscFree(baij->in_loc)); 1025 PetscCall(PetscFree(baij->v_loc)); 1026 PetscCall(PetscFree(baij->rangebs)); 1027 PetscCall(PetscFree(mat->data)); 1028 1029 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1030 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1031 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1032 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL)); 1033 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL)); 1034 #if defined(PETSC_HAVE_ELEMENTAL) 1035 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL)); 1036 #endif 1037 #if defined(PETSC_HAVE_SCALAPACK) 1038 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL)); 1039 #endif 1040 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL)); 1041 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL)); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1046 { 1047 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1048 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1049 PetscScalar *from; 1050 const PetscScalar *x; 1051 1052 PetscFunctionBegin; 1053 /* diagonal part */ 1054 PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a)); 1055 PetscCall(VecSet(a->slvec1b,0.0)); 1056 1057 /* subdiagonal part */ 1058 PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name); 1059 PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b)); 1060 1061 /* copy x into the vec slvec0 */ 1062 PetscCall(VecGetArray(a->slvec0,&from)); 1063 PetscCall(VecGetArrayRead(xx,&x)); 1064 1065 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1066 PetscCall(VecRestoreArray(a->slvec0,&from)); 1067 PetscCall(VecRestoreArrayRead(xx,&x)); 1068 1069 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1070 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1071 /* supperdiagonal part */ 1072 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy)); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1077 { 1078 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1079 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1080 PetscScalar *from; 1081 const PetscScalar *x; 1082 1083 PetscFunctionBegin; 1084 /* diagonal part */ 1085 PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a)); 1086 PetscCall(VecSet(a->slvec1b,0.0)); 1087 1088 /* subdiagonal part */ 1089 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b)); 1090 1091 /* copy x into the vec slvec0 */ 1092 PetscCall(VecGetArray(a->slvec0,&from)); 1093 PetscCall(VecGetArrayRead(xx,&x)); 1094 1095 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1096 PetscCall(VecRestoreArray(a->slvec0,&from)); 1097 PetscCall(VecRestoreArrayRead(xx,&x)); 1098 1099 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1100 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1101 /* supperdiagonal part */ 1102 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy)); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz) 1107 { 1108 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1109 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1110 PetscScalar *from,zero=0.0; 1111 const PetscScalar *x; 1112 1113 PetscFunctionBegin; 1114 /* diagonal part */ 1115 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a)); 1116 PetscCall(VecSet(a->slvec1b,zero)); 1117 1118 /* subdiagonal part */ 1119 PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name); 1120 PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b)); 1121 1122 /* copy x into the vec slvec0 */ 1123 PetscCall(VecGetArray(a->slvec0,&from)); 1124 PetscCall(VecGetArrayRead(xx,&x)); 1125 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1126 PetscCall(VecRestoreArray(a->slvec0,&from)); 1127 1128 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1129 PetscCall(VecRestoreArrayRead(xx,&x)); 1130 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1131 1132 /* supperdiagonal part */ 1133 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz)); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1138 { 1139 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1140 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1141 PetscScalar *from,zero=0.0; 1142 const PetscScalar *x; 1143 1144 PetscFunctionBegin; 1145 /* diagonal part */ 1146 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a)); 1147 PetscCall(VecSet(a->slvec1b,zero)); 1148 1149 /* subdiagonal part */ 1150 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b)); 1151 1152 /* copy x into the vec slvec0 */ 1153 PetscCall(VecGetArray(a->slvec0,&from)); 1154 PetscCall(VecGetArrayRead(xx,&x)); 1155 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1156 PetscCall(VecRestoreArray(a->slvec0,&from)); 1157 1158 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1159 PetscCall(VecRestoreArrayRead(xx,&x)); 1160 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1161 1162 /* supperdiagonal part */ 1163 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz)); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 /* 1168 This only works correctly for square matrices where the subblock A->A is the 1169 diagonal block 1170 */ 1171 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1172 { 1173 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1174 1175 PetscFunctionBegin; 1176 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1177 PetscCall(MatGetDiagonal(a->A,v)); 1178 PetscFunctionReturn(0); 1179 } 1180 1181 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1182 { 1183 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1184 1185 PetscFunctionBegin; 1186 PetscCall(MatScale(a->A,aa)); 1187 PetscCall(MatScale(a->B,aa)); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1192 { 1193 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1194 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1195 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1196 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1197 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1198 1199 PetscFunctionBegin; 1200 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1201 mat->getrowactive = PETSC_TRUE; 1202 1203 if (!mat->rowvalues && (idx || v)) { 1204 /* 1205 allocate enough space to hold information from the longest row. 1206 */ 1207 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1208 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1209 PetscInt max = 1,mbs = mat->mbs,tmp; 1210 for (i=0; i<mbs; i++) { 1211 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1212 if (max < tmp) max = tmp; 1213 } 1214 PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices)); 1215 } 1216 1217 PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1218 lrow = row - brstart; /* local row index */ 1219 1220 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1221 if (!v) {pvA = NULL; pvB = NULL;} 1222 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1223 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1224 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1225 nztot = nzA + nzB; 1226 1227 cmap = mat->garray; 1228 if (v || idx) { 1229 if (nztot) { 1230 /* Sort by increasing column numbers, assuming A and B already sorted */ 1231 PetscInt imark = -1; 1232 if (v) { 1233 *v = v_p = mat->rowvalues; 1234 for (i=0; i<nzB; i++) { 1235 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1236 else break; 1237 } 1238 imark = i; 1239 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1240 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1241 } 1242 if (idx) { 1243 *idx = idx_p = mat->rowindices; 1244 if (imark > -1) { 1245 for (i=0; i<imark; i++) { 1246 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1247 } 1248 } else { 1249 for (i=0; i<nzB; i++) { 1250 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1251 else break; 1252 } 1253 imark = i; 1254 } 1255 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1256 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1257 } 1258 } else { 1259 if (idx) *idx = NULL; 1260 if (v) *v = NULL; 1261 } 1262 } 1263 *nz = nztot; 1264 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1265 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1270 { 1271 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1272 1273 PetscFunctionBegin; 1274 PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1275 baij->getrowactive = PETSC_FALSE; 1276 PetscFunctionReturn(0); 1277 } 1278 1279 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1280 { 1281 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1282 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1283 1284 PetscFunctionBegin; 1285 aA->getrow_utriangular = PETSC_TRUE; 1286 PetscFunctionReturn(0); 1287 } 1288 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1289 { 1290 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1291 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1292 1293 PetscFunctionBegin; 1294 aA->getrow_utriangular = PETSC_FALSE; 1295 PetscFunctionReturn(0); 1296 } 1297 1298 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1299 { 1300 PetscFunctionBegin; 1301 if (PetscDefined(USE_COMPLEX)) { 1302 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 1303 1304 PetscCall(MatConjugate(a->A)); 1305 PetscCall(MatConjugate(a->B)); 1306 } 1307 PetscFunctionReturn(0); 1308 } 1309 1310 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1311 { 1312 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1313 1314 PetscFunctionBegin; 1315 PetscCall(MatRealPart(a->A)); 1316 PetscCall(MatRealPart(a->B)); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1321 { 1322 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1323 1324 PetscFunctionBegin; 1325 PetscCall(MatImaginaryPart(a->A)); 1326 PetscCall(MatImaginaryPart(a->B)); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1331 Input: isrow - distributed(parallel), 1332 iscol_local - locally owned (seq) 1333 */ 1334 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1335 { 1336 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1337 const PetscInt *ptr1,*ptr2; 1338 1339 PetscFunctionBegin; 1340 PetscCall(ISGetLocalSize(isrow,&sz1)); 1341 PetscCall(ISGetLocalSize(iscol_local,&sz2)); 1342 if (sz1 > sz2) { 1343 *flg = PETSC_FALSE; 1344 PetscFunctionReturn(0); 1345 } 1346 1347 PetscCall(ISGetIndices(isrow,&ptr1)); 1348 PetscCall(ISGetIndices(iscol_local,&ptr2)); 1349 1350 PetscCall(PetscMalloc1(sz1,&a1)); 1351 PetscCall(PetscMalloc1(sz2,&a2)); 1352 PetscCall(PetscArraycpy(a1,ptr1,sz1)); 1353 PetscCall(PetscArraycpy(a2,ptr2,sz2)); 1354 PetscCall(PetscSortInt(sz1,a1)); 1355 PetscCall(PetscSortInt(sz2,a2)); 1356 1357 nmatch=0; 1358 k = 0; 1359 for (i=0; i<sz1; i++) { 1360 for (j=k; j<sz2; j++) { 1361 if (a1[i] == a2[j]) { 1362 k = j; nmatch++; 1363 break; 1364 } 1365 } 1366 } 1367 PetscCall(ISRestoreIndices(isrow,&ptr1)); 1368 PetscCall(ISRestoreIndices(iscol_local,&ptr2)); 1369 PetscCall(PetscFree(a1)); 1370 PetscCall(PetscFree(a2)); 1371 if (nmatch < sz1) { 1372 *flg = PETSC_FALSE; 1373 } else { 1374 *flg = PETSC_TRUE; 1375 } 1376 PetscFunctionReturn(0); 1377 } 1378 1379 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1380 { 1381 IS iscol_local; 1382 PetscInt csize; 1383 PetscBool isequal; 1384 1385 PetscFunctionBegin; 1386 PetscCall(ISGetLocalSize(iscol,&csize)); 1387 if (call == MAT_REUSE_MATRIX) { 1388 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 1389 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1390 } else { 1391 PetscBool issorted; 1392 1393 PetscCall(ISAllGather(iscol,&iscol_local)); 1394 PetscCall(ISEqual_private(isrow,iscol_local,&isequal)); 1395 PetscCall(ISSorted(iscol_local, &issorted)); 1396 PetscCheck(isequal && issorted,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow and be sorted"); 1397 } 1398 1399 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1400 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat)); 1401 if (call == MAT_INITIAL_MATRIX) { 1402 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 1403 PetscCall(ISDestroy(&iscol_local)); 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1409 { 1410 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1411 1412 PetscFunctionBegin; 1413 PetscCall(MatZeroEntries(l->A)); 1414 PetscCall(MatZeroEntries(l->B)); 1415 PetscFunctionReturn(0); 1416 } 1417 1418 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1419 { 1420 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1421 Mat A = a->A,B = a->B; 1422 PetscLogDouble isend[5],irecv[5]; 1423 1424 PetscFunctionBegin; 1425 info->block_size = (PetscReal)matin->rmap->bs; 1426 1427 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1428 1429 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1430 isend[3] = info->memory; isend[4] = info->mallocs; 1431 1432 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1433 1434 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1435 isend[3] += info->memory; isend[4] += info->mallocs; 1436 if (flag == MAT_LOCAL) { 1437 info->nz_used = isend[0]; 1438 info->nz_allocated = isend[1]; 1439 info->nz_unneeded = isend[2]; 1440 info->memory = isend[3]; 1441 info->mallocs = isend[4]; 1442 } else if (flag == MAT_GLOBAL_MAX) { 1443 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1444 1445 info->nz_used = irecv[0]; 1446 info->nz_allocated = irecv[1]; 1447 info->nz_unneeded = irecv[2]; 1448 info->memory = irecv[3]; 1449 info->mallocs = irecv[4]; 1450 } else if (flag == MAT_GLOBAL_SUM) { 1451 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1452 1453 info->nz_used = irecv[0]; 1454 info->nz_allocated = irecv[1]; 1455 info->nz_unneeded = irecv[2]; 1456 info->memory = irecv[3]; 1457 info->mallocs = irecv[4]; 1458 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1459 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1460 info->fill_ratio_needed = 0; 1461 info->factor_mallocs = 0; 1462 PetscFunctionReturn(0); 1463 } 1464 1465 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1466 { 1467 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1468 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1469 1470 PetscFunctionBegin; 1471 switch (op) { 1472 case MAT_NEW_NONZERO_LOCATIONS: 1473 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1474 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1475 case MAT_KEEP_NONZERO_PATTERN: 1476 case MAT_SUBMAT_SINGLEIS: 1477 case MAT_NEW_NONZERO_LOCATION_ERR: 1478 MatCheckPreallocated(A,1); 1479 PetscCall(MatSetOption(a->A,op,flg)); 1480 PetscCall(MatSetOption(a->B,op,flg)); 1481 break; 1482 case MAT_ROW_ORIENTED: 1483 MatCheckPreallocated(A,1); 1484 a->roworiented = flg; 1485 1486 PetscCall(MatSetOption(a->A,op,flg)); 1487 PetscCall(MatSetOption(a->B,op,flg)); 1488 break; 1489 case MAT_FORCE_DIAGONAL_ENTRIES: 1490 case MAT_SORTED_FULL: 1491 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1492 break; 1493 case MAT_IGNORE_OFF_PROC_ENTRIES: 1494 a->donotstash = flg; 1495 break; 1496 case MAT_USE_HASH_TABLE: 1497 a->ht_flag = flg; 1498 break; 1499 case MAT_HERMITIAN: 1500 MatCheckPreallocated(A,1); 1501 PetscCall(MatSetOption(a->A,op,flg)); 1502 #if defined(PETSC_USE_COMPLEX) 1503 if (flg) { /* need different mat-vec ops */ 1504 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1505 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1506 A->ops->multtranspose = NULL; 1507 A->ops->multtransposeadd = NULL; 1508 A->symmetric = PETSC_FALSE; 1509 } 1510 #endif 1511 break; 1512 case MAT_SPD: 1513 case MAT_SYMMETRIC: 1514 MatCheckPreallocated(A,1); 1515 PetscCall(MatSetOption(a->A,op,flg)); 1516 #if defined(PETSC_USE_COMPLEX) 1517 if (flg) { /* restore to use default mat-vec ops */ 1518 A->ops->mult = MatMult_MPISBAIJ; 1519 A->ops->multadd = MatMultAdd_MPISBAIJ; 1520 A->ops->multtranspose = MatMult_MPISBAIJ; 1521 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1522 } 1523 #endif 1524 break; 1525 case MAT_STRUCTURALLY_SYMMETRIC: 1526 MatCheckPreallocated(A,1); 1527 PetscCall(MatSetOption(a->A,op,flg)); 1528 break; 1529 case MAT_SYMMETRY_ETERNAL: 1530 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1531 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1532 break; 1533 case MAT_IGNORE_LOWER_TRIANGULAR: 1534 aA->ignore_ltriangular = flg; 1535 break; 1536 case MAT_ERROR_LOWER_TRIANGULAR: 1537 aA->ignore_ltriangular = flg; 1538 break; 1539 case MAT_GETROW_UPPERTRIANGULAR: 1540 aA->getrow_utriangular = flg; 1541 break; 1542 default: 1543 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1544 } 1545 PetscFunctionReturn(0); 1546 } 1547 1548 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1549 { 1550 PetscFunctionBegin; 1551 if (reuse == MAT_INITIAL_MATRIX) { 1552 PetscCall(MatDuplicate(A,MAT_COPY_VALUES,B)); 1553 } else if (reuse == MAT_REUSE_MATRIX) { 1554 PetscCall(MatCopy(A,*B,SAME_NONZERO_PATTERN)); 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558 1559 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1560 { 1561 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1562 Mat a = baij->A, b=baij->B; 1563 PetscInt nv,m,n; 1564 PetscBool flg; 1565 1566 PetscFunctionBegin; 1567 if (ll != rr) { 1568 PetscCall(VecEqual(ll,rr,&flg)); 1569 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same"); 1570 } 1571 if (!ll) PetscFunctionReturn(0); 1572 1573 PetscCall(MatGetLocalSize(mat,&m,&n)); 1574 PetscCheck(m == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same",m,n); 1575 1576 PetscCall(VecGetLocalSize(rr,&nv)); 1577 PetscCheck(nv==n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1578 1579 PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1580 1581 /* left diagonalscale the off-diagonal part */ 1582 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1583 1584 /* scale the diagonal part */ 1585 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1586 1587 /* right diagonalscale the off-diagonal part */ 1588 PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1589 PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec)); 1590 PetscFunctionReturn(0); 1591 } 1592 1593 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1594 { 1595 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1596 1597 PetscFunctionBegin; 1598 PetscCall(MatSetUnfactored(a->A)); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1603 1604 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1605 { 1606 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1607 Mat a,b,c,d; 1608 PetscBool flg; 1609 1610 PetscFunctionBegin; 1611 a = matA->A; b = matA->B; 1612 c = matB->A; d = matB->B; 1613 1614 PetscCall(MatEqual(a,c,&flg)); 1615 if (flg) { 1616 PetscCall(MatEqual(b,d,&flg)); 1617 } 1618 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1623 { 1624 PetscBool isbaij; 1625 1626 PetscFunctionBegin; 1627 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"")); 1628 PetscCheck(isbaij,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1629 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1630 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1631 PetscCall(MatGetRowUpperTriangular(A)); 1632 PetscCall(MatCopy_Basic(A,B,str)); 1633 PetscCall(MatRestoreRowUpperTriangular(A)); 1634 } else { 1635 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1636 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1637 1638 PetscCall(MatCopy(a->A,b->A,str)); 1639 PetscCall(MatCopy(a->B,b->B,str)); 1640 } 1641 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1642 PetscFunctionReturn(0); 1643 } 1644 1645 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1646 { 1647 PetscFunctionBegin; 1648 PetscCall(MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 1649 PetscFunctionReturn(0); 1650 } 1651 1652 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1653 { 1654 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1655 PetscBLASInt bnz,one=1; 1656 Mat_SeqSBAIJ *xa,*ya; 1657 Mat_SeqBAIJ *xb,*yb; 1658 1659 PetscFunctionBegin; 1660 if (str == SAME_NONZERO_PATTERN) { 1661 PetscScalar alpha = a; 1662 xa = (Mat_SeqSBAIJ*)xx->A->data; 1663 ya = (Mat_SeqSBAIJ*)yy->A->data; 1664 PetscCall(PetscBLASIntCast(xa->nz,&bnz)); 1665 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1666 xb = (Mat_SeqBAIJ*)xx->B->data; 1667 yb = (Mat_SeqBAIJ*)yy->B->data; 1668 PetscCall(PetscBLASIntCast(xb->nz,&bnz)); 1669 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1670 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1671 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1672 PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 1673 PetscCall(MatAXPY_Basic(Y,a,X,str)); 1674 PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 1675 } else { 1676 Mat B; 1677 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1678 PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1679 PetscCall(MatGetRowUpperTriangular(X)); 1680 PetscCall(MatGetRowUpperTriangular(Y)); 1681 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 1682 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 1683 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 1684 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 1685 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 1686 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 1687 PetscCall(MatSetType(B,MATMPISBAIJ)); 1688 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d)); 1689 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 1690 PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o)); 1691 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 1692 PetscCall(MatHeaderMerge(Y,&B)); 1693 PetscCall(PetscFree(nnz_d)); 1694 PetscCall(PetscFree(nnz_o)); 1695 PetscCall(MatRestoreRowUpperTriangular(X)); 1696 PetscCall(MatRestoreRowUpperTriangular(Y)); 1697 } 1698 PetscFunctionReturn(0); 1699 } 1700 1701 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1702 { 1703 PetscInt i; 1704 PetscBool flg; 1705 1706 PetscFunctionBegin; 1707 PetscCall(MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B)); /* B[] are sbaij matrices */ 1708 for (i=0; i<n; i++) { 1709 PetscCall(ISEqual(irow[i],icol[i],&flg)); 1710 if (!flg) { 1711 PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1712 } 1713 } 1714 PetscFunctionReturn(0); 1715 } 1716 1717 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1718 { 1719 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1720 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1721 1722 PetscFunctionBegin; 1723 if (!Y->preallocated) { 1724 PetscCall(MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL)); 1725 } else if (!aij->nz) { 1726 PetscInt nonew = aij->nonew; 1727 PetscCall(MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL)); 1728 aij->nonew = nonew; 1729 } 1730 PetscCall(MatShift_Basic(Y,a)); 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1735 { 1736 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1737 1738 PetscFunctionBegin; 1739 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1740 PetscCall(MatMissingDiagonal(a->A,missing,d)); 1741 if (d) { 1742 PetscInt rstart; 1743 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 1744 *d += rstart/A->rmap->bs; 1745 1746 } 1747 PetscFunctionReturn(0); 1748 } 1749 1750 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1751 { 1752 PetscFunctionBegin; 1753 *a = ((Mat_MPISBAIJ*)A->data)->A; 1754 PetscFunctionReturn(0); 1755 } 1756 1757 /* -------------------------------------------------------------------*/ 1758 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1759 MatGetRow_MPISBAIJ, 1760 MatRestoreRow_MPISBAIJ, 1761 MatMult_MPISBAIJ, 1762 /* 4*/ MatMultAdd_MPISBAIJ, 1763 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1764 MatMultAdd_MPISBAIJ, 1765 NULL, 1766 NULL, 1767 NULL, 1768 /* 10*/ NULL, 1769 NULL, 1770 NULL, 1771 MatSOR_MPISBAIJ, 1772 MatTranspose_MPISBAIJ, 1773 /* 15*/ MatGetInfo_MPISBAIJ, 1774 MatEqual_MPISBAIJ, 1775 MatGetDiagonal_MPISBAIJ, 1776 MatDiagonalScale_MPISBAIJ, 1777 MatNorm_MPISBAIJ, 1778 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1779 MatAssemblyEnd_MPISBAIJ, 1780 MatSetOption_MPISBAIJ, 1781 MatZeroEntries_MPISBAIJ, 1782 /* 24*/ NULL, 1783 NULL, 1784 NULL, 1785 NULL, 1786 NULL, 1787 /* 29*/ MatSetUp_MPISBAIJ, 1788 NULL, 1789 NULL, 1790 MatGetDiagonalBlock_MPISBAIJ, 1791 NULL, 1792 /* 34*/ MatDuplicate_MPISBAIJ, 1793 NULL, 1794 NULL, 1795 NULL, 1796 NULL, 1797 /* 39*/ MatAXPY_MPISBAIJ, 1798 MatCreateSubMatrices_MPISBAIJ, 1799 MatIncreaseOverlap_MPISBAIJ, 1800 MatGetValues_MPISBAIJ, 1801 MatCopy_MPISBAIJ, 1802 /* 44*/ NULL, 1803 MatScale_MPISBAIJ, 1804 MatShift_MPISBAIJ, 1805 NULL, 1806 NULL, 1807 /* 49*/ NULL, 1808 NULL, 1809 NULL, 1810 NULL, 1811 NULL, 1812 /* 54*/ NULL, 1813 NULL, 1814 MatSetUnfactored_MPISBAIJ, 1815 NULL, 1816 MatSetValuesBlocked_MPISBAIJ, 1817 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1818 NULL, 1819 NULL, 1820 NULL, 1821 NULL, 1822 /* 64*/ NULL, 1823 NULL, 1824 NULL, 1825 NULL, 1826 NULL, 1827 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1828 NULL, 1829 MatConvert_MPISBAIJ_Basic, 1830 NULL, 1831 NULL, 1832 /* 74*/ NULL, 1833 NULL, 1834 NULL, 1835 NULL, 1836 NULL, 1837 /* 79*/ NULL, 1838 NULL, 1839 NULL, 1840 NULL, 1841 MatLoad_MPISBAIJ, 1842 /* 84*/ NULL, 1843 NULL, 1844 NULL, 1845 NULL, 1846 NULL, 1847 /* 89*/ NULL, 1848 NULL, 1849 NULL, 1850 NULL, 1851 NULL, 1852 /* 94*/ NULL, 1853 NULL, 1854 NULL, 1855 NULL, 1856 NULL, 1857 /* 99*/ NULL, 1858 NULL, 1859 NULL, 1860 MatConjugate_MPISBAIJ, 1861 NULL, 1862 /*104*/ NULL, 1863 MatRealPart_MPISBAIJ, 1864 MatImaginaryPart_MPISBAIJ, 1865 MatGetRowUpperTriangular_MPISBAIJ, 1866 MatRestoreRowUpperTriangular_MPISBAIJ, 1867 /*109*/ NULL, 1868 NULL, 1869 NULL, 1870 NULL, 1871 MatMissingDiagonal_MPISBAIJ, 1872 /*114*/ NULL, 1873 NULL, 1874 NULL, 1875 NULL, 1876 NULL, 1877 /*119*/ NULL, 1878 NULL, 1879 NULL, 1880 NULL, 1881 NULL, 1882 /*124*/ NULL, 1883 NULL, 1884 NULL, 1885 NULL, 1886 NULL, 1887 /*129*/ NULL, 1888 NULL, 1889 NULL, 1890 NULL, 1891 NULL, 1892 /*134*/ NULL, 1893 NULL, 1894 NULL, 1895 NULL, 1896 NULL, 1897 /*139*/ MatSetBlockSizes_Default, 1898 NULL, 1899 NULL, 1900 NULL, 1901 NULL, 1902 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1903 NULL, 1904 NULL, 1905 NULL 1906 }; 1907 1908 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1909 { 1910 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1911 PetscInt i,mbs,Mbs; 1912 PetscMPIInt size; 1913 1914 PetscFunctionBegin; 1915 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 1916 PetscCall(PetscLayoutSetUp(B->rmap)); 1917 PetscCall(PetscLayoutSetUp(B->cmap)); 1918 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 1919 PetscCheck(B->rmap->N <= B->cmap->N,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->N,B->cmap->N); 1920 PetscCheck(B->rmap->n <= B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->n,B->cmap->n); 1921 1922 mbs = B->rmap->n/bs; 1923 Mbs = B->rmap->N/bs; 1924 PetscCheck(mbs*bs == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,bs); 1925 1926 B->rmap->bs = bs; 1927 b->bs2 = bs*bs; 1928 b->mbs = mbs; 1929 b->Mbs = Mbs; 1930 b->nbs = B->cmap->n/bs; 1931 b->Nbs = B->cmap->N/bs; 1932 1933 for (i=0; i<=b->size; i++) { 1934 b->rangebs[i] = B->rmap->range[i]/bs; 1935 } 1936 b->rstartbs = B->rmap->rstart/bs; 1937 b->rendbs = B->rmap->rend/bs; 1938 1939 b->cstartbs = B->cmap->rstart/bs; 1940 b->cendbs = B->cmap->rend/bs; 1941 1942 #if defined(PETSC_USE_CTABLE) 1943 PetscCall(PetscTableDestroy(&b->colmap)); 1944 #else 1945 PetscCall(PetscFree(b->colmap)); 1946 #endif 1947 PetscCall(PetscFree(b->garray)); 1948 PetscCall(VecDestroy(&b->lvec)); 1949 PetscCall(VecScatterDestroy(&b->Mvctx)); 1950 PetscCall(VecDestroy(&b->slvec0)); 1951 PetscCall(VecDestroy(&b->slvec0b)); 1952 PetscCall(VecDestroy(&b->slvec1)); 1953 PetscCall(VecDestroy(&b->slvec1a)); 1954 PetscCall(VecDestroy(&b->slvec1b)); 1955 PetscCall(VecScatterDestroy(&b->sMvctx)); 1956 1957 /* Because the B will have been resized we simply destroy it and create a new one each time */ 1958 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 1959 PetscCall(MatDestroy(&b->B)); 1960 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 1961 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 1962 PetscCall(MatSetType(b->B,MATSEQBAIJ)); 1963 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 1964 1965 if (!B->preallocated) { 1966 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 1967 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 1968 PetscCall(MatSetType(b->A,MATSEQSBAIJ)); 1969 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 1970 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash)); 1971 } 1972 1973 PetscCall(MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz)); 1974 PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz)); 1975 1976 B->preallocated = PETSC_TRUE; 1977 B->was_assembled = PETSC_FALSE; 1978 B->assembled = PETSC_FALSE; 1979 PetscFunctionReturn(0); 1980 } 1981 1982 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1983 { 1984 PetscInt m,rstart,cend; 1985 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 1986 const PetscInt *JJ =NULL; 1987 PetscScalar *values=NULL; 1988 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 1989 PetscBool nooffprocentries; 1990 1991 PetscFunctionBegin; 1992 PetscCheck(bs >= 1,PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs); 1993 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 1994 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 1995 PetscCall(PetscLayoutSetUp(B->rmap)); 1996 PetscCall(PetscLayoutSetUp(B->cmap)); 1997 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 1998 m = B->rmap->n/bs; 1999 rstart = B->rmap->rstart/bs; 2000 cend = B->cmap->rend/bs; 2001 2002 PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2003 PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz)); 2004 for (i=0; i<m; i++) { 2005 nz = ii[i+1] - ii[i]; 2006 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2007 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2008 JJ = jj + ii[i]; 2009 bd = 0; 2010 for (j=0; j<nz; j++) { 2011 if (*JJ >= i + rstart) break; 2012 JJ++; 2013 bd++; 2014 } 2015 d = 0; 2016 for (; j<nz; j++) { 2017 if (*JJ++ >= cend) break; 2018 d++; 2019 } 2020 d_nnz[i] = d; 2021 o_nnz[i] = nz - d - bd; 2022 nz = nz - bd; 2023 nz_max = PetscMax(nz_max,nz); 2024 } 2025 PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz)); 2026 PetscCall(MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)); 2027 PetscCall(PetscFree2(d_nnz,o_nnz)); 2028 2029 values = (PetscScalar*)V; 2030 if (!values) { 2031 PetscCall(PetscCalloc1(bs*bs*nz_max,&values)); 2032 } 2033 for (i=0; i<m; i++) { 2034 PetscInt row = i + rstart; 2035 PetscInt ncols = ii[i+1] - ii[i]; 2036 const PetscInt *icols = jj + ii[i]; 2037 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2038 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2039 PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES)); 2040 } else { /* block ordering does not match so we can only insert one block at a time. */ 2041 PetscInt j; 2042 for (j=0; j<ncols; j++) { 2043 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2044 PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES)); 2045 } 2046 } 2047 } 2048 2049 if (!V) PetscCall(PetscFree(values)); 2050 nooffprocentries = B->nooffprocentries; 2051 B->nooffprocentries = PETSC_TRUE; 2052 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2053 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2054 B->nooffprocentries = nooffprocentries; 2055 2056 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2057 PetscFunctionReturn(0); 2058 } 2059 2060 /*MC 2061 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2062 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2063 the matrix is stored. 2064 2065 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2066 can call MatSetOption(Mat, MAT_HERMITIAN); 2067 2068 Options Database Keys: 2069 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2070 2071 Notes: 2072 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2073 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2074 2075 Level: beginner 2076 2077 .seealso: `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2078 M*/ 2079 2080 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2081 { 2082 Mat_MPISBAIJ *b; 2083 PetscBool flg = PETSC_FALSE; 2084 2085 PetscFunctionBegin; 2086 PetscCall(PetscNewLog(B,&b)); 2087 B->data = (void*)b; 2088 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 2089 2090 B->ops->destroy = MatDestroy_MPISBAIJ; 2091 B->ops->view = MatView_MPISBAIJ; 2092 B->assembled = PETSC_FALSE; 2093 B->insertmode = NOT_SET_VALUES; 2094 2095 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 2096 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size)); 2097 2098 /* build local table of row and column ownerships */ 2099 PetscCall(PetscMalloc1(b->size+2,&b->rangebs)); 2100 2101 /* build cache for off array entries formed */ 2102 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 2103 2104 b->donotstash = PETSC_FALSE; 2105 b->colmap = NULL; 2106 b->garray = NULL; 2107 b->roworiented = PETSC_TRUE; 2108 2109 /* stuff used in block assembly */ 2110 b->barray = NULL; 2111 2112 /* stuff used for matrix vector multiply */ 2113 b->lvec = NULL; 2114 b->Mvctx = NULL; 2115 b->slvec0 = NULL; 2116 b->slvec0b = NULL; 2117 b->slvec1 = NULL; 2118 b->slvec1a = NULL; 2119 b->slvec1b = NULL; 2120 b->sMvctx = NULL; 2121 2122 /* stuff for MatGetRow() */ 2123 b->rowindices = NULL; 2124 b->rowvalues = NULL; 2125 b->getrowactive = PETSC_FALSE; 2126 2127 /* hash table stuff */ 2128 b->ht = NULL; 2129 b->hd = NULL; 2130 b->ht_size = 0; 2131 b->ht_flag = PETSC_FALSE; 2132 b->ht_fact = 0; 2133 b->ht_total_ct = 0; 2134 b->ht_insert_ct = 0; 2135 2136 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2137 b->ijonly = PETSC_FALSE; 2138 2139 b->in_loc = NULL; 2140 b->v_loc = NULL; 2141 b->n_loc = 0; 2142 2143 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ)); 2144 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ)); 2145 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ)); 2146 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2147 #if defined(PETSC_HAVE_ELEMENTAL) 2148 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental)); 2149 #endif 2150 #if defined(PETSC_HAVE_SCALAPACK) 2151 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK)); 2152 #endif 2153 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic)); 2154 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic)); 2155 2156 B->symmetric = PETSC_TRUE; 2157 B->structurally_symmetric = PETSC_TRUE; 2158 B->symmetric_set = PETSC_TRUE; 2159 B->structurally_symmetric_set = PETSC_TRUE; 2160 B->symmetric_eternal = PETSC_TRUE; 2161 #if defined(PETSC_USE_COMPLEX) 2162 B->hermitian = PETSC_FALSE; 2163 B->hermitian_set = PETSC_FALSE; 2164 #else 2165 B->hermitian = PETSC_TRUE; 2166 B->hermitian_set = PETSC_TRUE; 2167 #endif 2168 2169 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ)); 2170 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat"); 2171 PetscCall(PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL)); 2172 if (flg) { 2173 PetscReal fact = 1.39; 2174 PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE)); 2175 PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL)); 2176 if (fact <= 1.0) fact = 1.39; 2177 PetscCall(MatMPIBAIJSetHashTableFactor(B,fact)); 2178 PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact)); 2179 } 2180 PetscOptionsEnd(); 2181 PetscFunctionReturn(0); 2182 } 2183 2184 /*MC 2185 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2186 2187 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2188 and MATMPISBAIJ otherwise. 2189 2190 Options Database Keys: 2191 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2192 2193 Level: beginner 2194 2195 .seealso: `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2196 M*/ 2197 2198 /*@C 2199 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2200 the user should preallocate the matrix storage by setting the parameters 2201 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2202 performance can be increased by more than a factor of 50. 2203 2204 Collective on Mat 2205 2206 Input Parameters: 2207 + B - the matrix 2208 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2209 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2210 . d_nz - number of block nonzeros per block row in diagonal portion of local 2211 submatrix (same for all local rows) 2212 . d_nnz - array containing the number of block nonzeros in the various block rows 2213 in the upper triangular and diagonal part of the in diagonal portion of the local 2214 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2215 for the diagonal entry and set a value even if it is zero. 2216 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2217 submatrix (same for all local rows). 2218 - o_nnz - array containing the number of nonzeros in the various block rows of the 2219 off-diagonal portion of the local submatrix that is right of the diagonal 2220 (possibly different for each block row) or NULL. 2221 2222 Options Database Keys: 2223 + -mat_no_unroll - uses code that does not unroll the loops in the 2224 block calculations (much slower) 2225 - -mat_block_size - size of the blocks to use 2226 2227 Notes: 2228 2229 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2230 than it must be used on all processors that share the object for that argument. 2231 2232 If the *_nnz parameter is given then the *_nz parameter is ignored 2233 2234 Storage Information: 2235 For a square global matrix we define each processor's diagonal portion 2236 to be its local rows and the corresponding columns (a square submatrix); 2237 each processor's off-diagonal portion encompasses the remainder of the 2238 local matrix (a rectangular submatrix). 2239 2240 The user can specify preallocated storage for the diagonal part of 2241 the local submatrix with either d_nz or d_nnz (not both). Set 2242 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2243 memory allocation. Likewise, specify preallocated storage for the 2244 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2245 2246 You can call MatGetInfo() to get information on how effective the preallocation was; 2247 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2248 You can also run with the option -info and look for messages with the string 2249 malloc in them to see if additional memory allocation was needed. 2250 2251 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2252 the figure below we depict these three local rows and all columns (0-11). 2253 2254 .vb 2255 0 1 2 3 4 5 6 7 8 9 10 11 2256 -------------------------- 2257 row 3 |. . . d d d o o o o o o 2258 row 4 |. . . d d d o o o o o o 2259 row 5 |. . . d d d o o o o o o 2260 -------------------------- 2261 .ve 2262 2263 Thus, any entries in the d locations are stored in the d (diagonal) 2264 submatrix, and any entries in the o locations are stored in the 2265 o (off-diagonal) submatrix. Note that the d matrix is stored in 2266 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2267 2268 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2269 plus the diagonal part of the d matrix, 2270 and o_nz should indicate the number of block nonzeros per row in the o matrix 2271 2272 In general, for PDE problems in which most nonzeros are near the diagonal, 2273 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2274 or you will get TERRIBLE performance; see the users' manual chapter on 2275 matrices. 2276 2277 Level: intermediate 2278 2279 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2280 @*/ 2281 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2282 { 2283 PetscFunctionBegin; 2284 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2285 PetscValidType(B,1); 2286 PetscValidLogicalCollectiveInt(B,bs,2); 2287 PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz)); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 /*@C 2292 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2293 (block compressed row). For good matrix assembly performance 2294 the user should preallocate the matrix storage by setting the parameters 2295 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2296 performance can be increased by more than a factor of 50. 2297 2298 Collective 2299 2300 Input Parameters: 2301 + comm - MPI communicator 2302 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2303 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2304 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2305 This value should be the same as the local size used in creating the 2306 y vector for the matrix-vector product y = Ax. 2307 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2308 This value should be the same as the local size used in creating the 2309 x vector for the matrix-vector product y = Ax. 2310 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2311 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2312 . d_nz - number of block nonzeros per block row in diagonal portion of local 2313 submatrix (same for all local rows) 2314 . d_nnz - array containing the number of block nonzeros in the various block rows 2315 in the upper triangular portion of the in diagonal portion of the local 2316 (possibly different for each block block row) or NULL. 2317 If you plan to factor the matrix you must leave room for the diagonal entry and 2318 set its value even if it is zero. 2319 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2320 submatrix (same for all local rows). 2321 - o_nnz - array containing the number of nonzeros in the various block rows of the 2322 off-diagonal portion of the local submatrix (possibly different for 2323 each block row) or NULL. 2324 2325 Output Parameter: 2326 . A - the matrix 2327 2328 Options Database Keys: 2329 + -mat_no_unroll - uses code that does not unroll the loops in the 2330 block calculations (much slower) 2331 . -mat_block_size - size of the blocks to use 2332 - -mat_mpi - use the parallel matrix data structures even on one processor 2333 (defaults to using SeqBAIJ format on one processor) 2334 2335 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2336 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2337 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2338 2339 Notes: 2340 The number of rows and columns must be divisible by blocksize. 2341 This matrix type does not support complex Hermitian operation. 2342 2343 The user MUST specify either the local or global matrix dimensions 2344 (possibly both). 2345 2346 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2347 than it must be used on all processors that share the object for that argument. 2348 2349 If the *_nnz parameter is given then the *_nz parameter is ignored 2350 2351 Storage Information: 2352 For a square global matrix we define each processor's diagonal portion 2353 to be its local rows and the corresponding columns (a square submatrix); 2354 each processor's off-diagonal portion encompasses the remainder of the 2355 local matrix (a rectangular submatrix). 2356 2357 The user can specify preallocated storage for the diagonal part of 2358 the local submatrix with either d_nz or d_nnz (not both). Set 2359 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2360 memory allocation. Likewise, specify preallocated storage for the 2361 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2362 2363 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2364 the figure below we depict these three local rows and all columns (0-11). 2365 2366 .vb 2367 0 1 2 3 4 5 6 7 8 9 10 11 2368 -------------------------- 2369 row 3 |. . . d d d o o o o o o 2370 row 4 |. . . d d d o o o o o o 2371 row 5 |. . . d d d o o o o o o 2372 -------------------------- 2373 .ve 2374 2375 Thus, any entries in the d locations are stored in the d (diagonal) 2376 submatrix, and any entries in the o locations are stored in the 2377 o (off-diagonal) submatrix. Note that the d matrix is stored in 2378 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2379 2380 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2381 plus the diagonal part of the d matrix, 2382 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2383 In general, for PDE problems in which most nonzeros are near the diagonal, 2384 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2385 or you will get TERRIBLE performance; see the users' manual chapter on 2386 matrices. 2387 2388 Level: intermediate 2389 2390 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2391 @*/ 2392 2393 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2394 { 2395 PetscMPIInt size; 2396 2397 PetscFunctionBegin; 2398 PetscCall(MatCreate(comm,A)); 2399 PetscCall(MatSetSizes(*A,m,n,M,N)); 2400 PetscCallMPI(MPI_Comm_size(comm,&size)); 2401 if (size > 1) { 2402 PetscCall(MatSetType(*A,MATMPISBAIJ)); 2403 PetscCall(MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz)); 2404 } else { 2405 PetscCall(MatSetType(*A,MATSEQSBAIJ)); 2406 PetscCall(MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz)); 2407 } 2408 PetscFunctionReturn(0); 2409 } 2410 2411 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2412 { 2413 Mat mat; 2414 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2415 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2416 PetscScalar *array; 2417 2418 PetscFunctionBegin; 2419 *newmat = NULL; 2420 2421 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2422 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2423 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2424 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2425 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2426 2427 mat->factortype = matin->factortype; 2428 mat->preallocated = PETSC_TRUE; 2429 mat->assembled = PETSC_TRUE; 2430 mat->insertmode = NOT_SET_VALUES; 2431 2432 a = (Mat_MPISBAIJ*)mat->data; 2433 a->bs2 = oldmat->bs2; 2434 a->mbs = oldmat->mbs; 2435 a->nbs = oldmat->nbs; 2436 a->Mbs = oldmat->Mbs; 2437 a->Nbs = oldmat->Nbs; 2438 2439 a->size = oldmat->size; 2440 a->rank = oldmat->rank; 2441 a->donotstash = oldmat->donotstash; 2442 a->roworiented = oldmat->roworiented; 2443 a->rowindices = NULL; 2444 a->rowvalues = NULL; 2445 a->getrowactive = PETSC_FALSE; 2446 a->barray = NULL; 2447 a->rstartbs = oldmat->rstartbs; 2448 a->rendbs = oldmat->rendbs; 2449 a->cstartbs = oldmat->cstartbs; 2450 a->cendbs = oldmat->cendbs; 2451 2452 /* hash table stuff */ 2453 a->ht = NULL; 2454 a->hd = NULL; 2455 a->ht_size = 0; 2456 a->ht_flag = oldmat->ht_flag; 2457 a->ht_fact = oldmat->ht_fact; 2458 a->ht_total_ct = 0; 2459 a->ht_insert_ct = 0; 2460 2461 PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2)); 2462 if (oldmat->colmap) { 2463 #if defined(PETSC_USE_CTABLE) 2464 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2465 #else 2466 PetscCall(PetscMalloc1(a->Nbs,&a->colmap)); 2467 PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt))); 2468 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs)); 2469 #endif 2470 } else a->colmap = NULL; 2471 2472 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2473 PetscCall(PetscMalloc1(len,&a->garray)); 2474 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2475 PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2476 } else a->garray = NULL; 2477 2478 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash)); 2479 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2480 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2481 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2482 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2483 2484 PetscCall(VecDuplicate(oldmat->slvec0,&a->slvec0)); 2485 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0)); 2486 PetscCall(VecDuplicate(oldmat->slvec1,&a->slvec1)); 2487 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1)); 2488 2489 PetscCall(VecGetLocalSize(a->slvec1,&nt)); 2490 PetscCall(VecGetArray(a->slvec1,&array)); 2491 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a)); 2492 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b)); 2493 PetscCall(VecRestoreArray(a->slvec1,&array)); 2494 PetscCall(VecGetArray(a->slvec0,&array)); 2495 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b)); 2496 PetscCall(VecRestoreArray(a->slvec0,&array)); 2497 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0)); 2498 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1)); 2499 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b)); 2500 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a)); 2501 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b)); 2502 2503 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2504 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2505 a->sMvctx = oldmat->sMvctx; 2506 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx)); 2507 2508 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2509 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2510 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2511 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2512 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2513 *newmat = mat; 2514 PetscFunctionReturn(0); 2515 } 2516 2517 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2518 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2519 2520 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer) 2521 { 2522 PetscBool isbinary; 2523 2524 PetscFunctionBegin; 2525 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2526 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 2527 PetscCall(MatLoad_MPISBAIJ_Binary(mat,viewer)); 2528 PetscFunctionReturn(0); 2529 } 2530 2531 /*XXXXX@ 2532 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2533 2534 Input Parameters: 2535 . mat - the matrix 2536 . fact - factor 2537 2538 Not Collective on Mat, each process can have a different hash factor 2539 2540 Level: advanced 2541 2542 Notes: 2543 This can also be set by the command line option: -mat_use_hash_table fact 2544 2545 .seealso: `MatSetOption()` 2546 @XXXXX*/ 2547 2548 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2549 { 2550 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2551 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2552 PetscReal atmp; 2553 PetscReal *work,*svalues,*rvalues; 2554 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2555 PetscMPIInt rank,size; 2556 PetscInt *rowners_bs,dest,count,source; 2557 PetscScalar *va; 2558 MatScalar *ba; 2559 MPI_Status stat; 2560 2561 PetscFunctionBegin; 2562 PetscCheck(!idx,PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2563 PetscCall(MatGetRowMaxAbs(a->A,v,NULL)); 2564 PetscCall(VecGetArray(v,&va)); 2565 2566 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 2567 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 2568 2569 bs = A->rmap->bs; 2570 mbs = a->mbs; 2571 Mbs = a->Mbs; 2572 ba = b->a; 2573 bi = b->i; 2574 bj = b->j; 2575 2576 /* find ownerships */ 2577 rowners_bs = A->rmap->range; 2578 2579 /* each proc creates an array to be distributed */ 2580 PetscCall(PetscCalloc1(bs*Mbs,&work)); 2581 2582 /* row_max for B */ 2583 if (rank != size-1) { 2584 for (i=0; i<mbs; i++) { 2585 ncols = bi[1] - bi[0]; bi++; 2586 brow = bs*i; 2587 for (j=0; j<ncols; j++) { 2588 bcol = bs*(*bj); 2589 for (kcol=0; kcol<bs; kcol++) { 2590 col = bcol + kcol; /* local col index */ 2591 col += rowners_bs[rank+1]; /* global col index */ 2592 for (krow=0; krow<bs; krow++) { 2593 atmp = PetscAbsScalar(*ba); ba++; 2594 row = brow + krow; /* local row index */ 2595 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2596 if (work[col] < atmp) work[col] = atmp; 2597 } 2598 } 2599 bj++; 2600 } 2601 } 2602 2603 /* send values to its owners */ 2604 for (dest=rank+1; dest<size; dest++) { 2605 svalues = work + rowners_bs[dest]; 2606 count = rowners_bs[dest+1]-rowners_bs[dest]; 2607 PetscCallMPI(MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A))); 2608 } 2609 } 2610 2611 /* receive values */ 2612 if (rank) { 2613 rvalues = work; 2614 count = rowners_bs[rank+1]-rowners_bs[rank]; 2615 for (source=0; source<rank; source++) { 2616 PetscCallMPI(MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat)); 2617 /* process values */ 2618 for (i=0; i<count; i++) { 2619 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2620 } 2621 } 2622 } 2623 2624 PetscCall(VecRestoreArray(v,&va)); 2625 PetscCall(PetscFree(work)); 2626 PetscFunctionReturn(0); 2627 } 2628 2629 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2630 { 2631 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2632 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2633 PetscScalar *x,*ptr,*from; 2634 Vec bb1; 2635 const PetscScalar *b; 2636 2637 PetscFunctionBegin; 2638 PetscCheck(its > 0 && lits > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 2639 PetscCheck(bs <= 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2640 2641 if (flag == SOR_APPLY_UPPER) { 2642 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2643 PetscFunctionReturn(0); 2644 } 2645 2646 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2647 if (flag & SOR_ZERO_INITIAL_GUESS) { 2648 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx)); 2649 its--; 2650 } 2651 2652 PetscCall(VecDuplicate(bb,&bb1)); 2653 while (its--) { 2654 2655 /* lower triangular part: slvec0b = - B^T*xx */ 2656 PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b)); 2657 2658 /* copy xx into slvec0a */ 2659 PetscCall(VecGetArray(mat->slvec0,&ptr)); 2660 PetscCall(VecGetArray(xx,&x)); 2661 PetscCall(PetscArraycpy(ptr,x,bs*mbs)); 2662 PetscCall(VecRestoreArray(mat->slvec0,&ptr)); 2663 2664 PetscCall(VecScale(mat->slvec0,-1.0)); 2665 2666 /* copy bb into slvec1a */ 2667 PetscCall(VecGetArray(mat->slvec1,&ptr)); 2668 PetscCall(VecGetArrayRead(bb,&b)); 2669 PetscCall(PetscArraycpy(ptr,b,bs*mbs)); 2670 PetscCall(VecRestoreArray(mat->slvec1,&ptr)); 2671 2672 /* set slvec1b = 0 */ 2673 PetscCall(VecSet(mat->slvec1b,0.0)); 2674 2675 PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2676 PetscCall(VecRestoreArray(xx,&x)); 2677 PetscCall(VecRestoreArrayRead(bb,&b)); 2678 PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2679 2680 /* upper triangular part: bb1 = bb1 - B*x */ 2681 PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1)); 2682 2683 /* local diagonal sweep */ 2684 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx)); 2685 } 2686 PetscCall(VecDestroy(&bb1)); 2687 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2688 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2689 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2690 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2691 } else if (flag & SOR_EISENSTAT) { 2692 Vec xx1; 2693 PetscBool hasop; 2694 const PetscScalar *diag; 2695 PetscScalar *sl,scale = (omega - 2.0)/omega; 2696 PetscInt i,n; 2697 2698 if (!mat->xx1) { 2699 PetscCall(VecDuplicate(bb,&mat->xx1)); 2700 PetscCall(VecDuplicate(bb,&mat->bb1)); 2701 } 2702 xx1 = mat->xx1; 2703 bb1 = mat->bb1; 2704 2705 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 2706 2707 if (!mat->diag) { 2708 /* this is wrong for same matrix with new nonzero values */ 2709 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 2710 PetscCall(MatGetDiagonal(matin,mat->diag)); 2711 } 2712 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 2713 2714 if (hasop) { 2715 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 2716 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2717 } else { 2718 /* 2719 These two lines are replaced by code that may be a bit faster for a good compiler 2720 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2721 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2722 */ 2723 PetscCall(VecGetArray(mat->slvec1a,&sl)); 2724 PetscCall(VecGetArrayRead(mat->diag,&diag)); 2725 PetscCall(VecGetArrayRead(bb,&b)); 2726 PetscCall(VecGetArray(xx,&x)); 2727 PetscCall(VecGetLocalSize(xx,&n)); 2728 if (omega == 1.0) { 2729 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2730 PetscCall(PetscLogFlops(2.0*n)); 2731 } else { 2732 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2733 PetscCall(PetscLogFlops(3.0*n)); 2734 } 2735 PetscCall(VecRestoreArray(mat->slvec1a,&sl)); 2736 PetscCall(VecRestoreArrayRead(mat->diag,&diag)); 2737 PetscCall(VecRestoreArrayRead(bb,&b)); 2738 PetscCall(VecRestoreArray(xx,&x)); 2739 } 2740 2741 /* multiply off-diagonal portion of matrix */ 2742 PetscCall(VecSet(mat->slvec1b,0.0)); 2743 PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b)); 2744 PetscCall(VecGetArray(mat->slvec0,&from)); 2745 PetscCall(VecGetArray(xx,&x)); 2746 PetscCall(PetscArraycpy(from,x,bs*mbs)); 2747 PetscCall(VecRestoreArray(mat->slvec0,&from)); 2748 PetscCall(VecRestoreArray(xx,&x)); 2749 PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2750 PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2751 PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a)); 2752 2753 /* local sweep */ 2754 PetscCall((*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 2755 PetscCall(VecAXPY(xx,1.0,xx1)); 2756 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2757 PetscFunctionReturn(0); 2758 } 2759 2760 /*@ 2761 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2762 CSR format the local rows. 2763 2764 Collective 2765 2766 Input Parameters: 2767 + comm - MPI communicator 2768 . bs - the block size, only a block size of 1 is supported 2769 . m - number of local rows (Cannot be PETSC_DECIDE) 2770 . n - This value should be the same as the local size used in creating the 2771 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2772 calculated if N is given) For square matrices n is almost always m. 2773 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2774 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2775 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2776 . j - column indices 2777 - a - matrix values 2778 2779 Output Parameter: 2780 . mat - the matrix 2781 2782 Level: intermediate 2783 2784 Notes: 2785 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2786 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2787 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2788 2789 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2790 2791 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2792 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2793 @*/ 2794 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2795 { 2796 PetscFunctionBegin; 2797 PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2798 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2799 PetscCall(MatCreate(comm,mat)); 2800 PetscCall(MatSetSizes(*mat,m,n,M,N)); 2801 PetscCall(MatSetType(*mat,MATMPISBAIJ)); 2802 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a)); 2803 PetscFunctionReturn(0); 2804 } 2805 2806 /*@C 2807 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 2808 2809 Collective 2810 2811 Input Parameters: 2812 + B - the matrix 2813 . bs - the block size 2814 . i - the indices into j for the start of each local row (starts with zero) 2815 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2816 - v - optional values in the matrix 2817 2818 Level: advanced 2819 2820 Notes: 2821 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2822 and usually the numerical values as well 2823 2824 Any entries below the diagonal are ignored 2825 2826 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2827 @*/ 2828 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2829 { 2830 PetscFunctionBegin; 2831 PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 2832 PetscFunctionReturn(0); 2833 } 2834 2835 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2836 { 2837 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2838 PetscInt *indx; 2839 PetscScalar *values; 2840 2841 PetscFunctionBegin; 2842 PetscCall(MatGetSize(inmat,&m,&N)); 2843 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2844 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2845 PetscInt *dnz,*onz,mbs,Nbs,nbs; 2846 PetscInt *bindx,rmax=a->rmax,j; 2847 PetscMPIInt rank,size; 2848 2849 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 2850 mbs = m/bs; Nbs = N/cbs; 2851 if (n == PETSC_DECIDE) { 2852 PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N)); 2853 } 2854 nbs = n/cbs; 2855 2856 PetscCall(PetscMalloc1(rmax,&bindx)); 2857 MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */ 2858 2859 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 2860 PetscCallMPI(MPI_Comm_rank(comm,&size)); 2861 if (rank == size-1) { 2862 /* Check sum(nbs) = Nbs */ 2863 PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 2864 } 2865 2866 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2867 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 2868 for (i=0; i<mbs; i++) { 2869 PetscCall(MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */ 2870 nnz = nnz/bs; 2871 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2872 PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz)); 2873 PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); 2874 } 2875 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 2876 PetscCall(PetscFree(bindx)); 2877 2878 PetscCall(MatCreate(comm,outmat)); 2879 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 2880 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 2881 PetscCall(MatSetType(*outmat,MATSBAIJ)); 2882 PetscCall(MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz)); 2883 PetscCall(MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz)); 2884 MatPreallocateEnd(dnz,onz); 2885 } 2886 2887 /* numeric phase */ 2888 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 2889 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 2890 2891 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 2892 for (i=0; i<m; i++) { 2893 PetscCall(MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values)); 2894 Ii = i + rstart; 2895 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 2896 PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values)); 2897 } 2898 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 2899 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 2900 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 2901 PetscFunctionReturn(0); 2902 } 2903