1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #if defined(PETSC_HAVE_SCALAPACK) 11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 12 #endif 13 14 /* This could be moved to matimpl.h */ 15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 16 { 17 Mat preallocator; 18 PetscInt r,rstart,rend; 19 PetscInt bs,i,m,n,M,N; 20 PetscBool cong = PETSC_TRUE; 21 22 PetscFunctionBegin; 23 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 24 PetscValidLogicalCollectiveInt(B,nm,2); 25 for (i = 0; i < nm; i++) { 26 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 27 PetscCall(PetscLayoutCompare(B->rmap,X[i]->rmap,&cong)); 28 PetscCheck(cong,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 29 } 30 PetscValidLogicalCollectiveBool(B,fill,5); 31 PetscCall(MatGetBlockSize(B,&bs)); 32 PetscCall(MatGetSize(B,&M,&N)); 33 PetscCall(MatGetLocalSize(B,&m,&n)); 34 PetscCall(MatCreate(PetscObjectComm((PetscObject)B),&preallocator)); 35 PetscCall(MatSetType(preallocator,MATPREALLOCATOR)); 36 PetscCall(MatSetBlockSize(preallocator,bs)); 37 PetscCall(MatSetSizes(preallocator,m,n,M,N)); 38 PetscCall(MatSetUp(preallocator)); 39 PetscCall(MatGetOwnershipRange(preallocator,&rstart,&rend)); 40 for (r = rstart; r < rend; ++r) { 41 PetscInt ncols; 42 const PetscInt *row; 43 const PetscScalar *vals; 44 45 for (i = 0; i < nm; i++) { 46 PetscCall(MatGetRow(X[i],r,&ncols,&row,&vals)); 47 PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES)); 48 if (symm && symm[i]) { 49 PetscCall(MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES)); 50 } 51 PetscCall(MatRestoreRow(X[i],r,&ncols,&row,&vals)); 52 } 53 } 54 PetscCall(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY)); 55 PetscCall(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY)); 56 PetscCall(MatPreallocatorPreallocate(preallocator,fill,B)); 57 PetscCall(MatDestroy(&preallocator)); 58 PetscFunctionReturn(0); 59 } 60 61 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 62 { 63 Mat B; 64 PetscInt r; 65 66 PetscFunctionBegin; 67 if (reuse != MAT_REUSE_MATRIX) { 68 PetscBool symm = PETSC_TRUE,isdense; 69 PetscInt bs; 70 71 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 72 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 73 PetscCall(MatSetType(B,newtype)); 74 PetscCall(MatGetBlockSize(A,&bs)); 75 PetscCall(MatSetBlockSize(B,bs)); 76 PetscCall(PetscLayoutSetUp(B->rmap)); 77 PetscCall(PetscLayoutSetUp(B->cmap)); 78 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"")); 79 if (!isdense) { 80 PetscCall(MatGetRowUpperTriangular(A)); 81 PetscCall(MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE)); 82 PetscCall(MatRestoreRowUpperTriangular(A)); 83 } else { 84 PetscCall(MatSetUp(B)); 85 } 86 } else { 87 B = *newmat; 88 PetscCall(MatZeroEntries(B)); 89 } 90 91 PetscCall(MatGetRowUpperTriangular(A)); 92 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 93 PetscInt ncols; 94 const PetscInt *row; 95 const PetscScalar *vals; 96 97 PetscCall(MatGetRow(A,r,&ncols,&row,&vals)); 98 PetscCall(MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES)); 99 #if defined(PETSC_USE_COMPLEX) 100 if (A->hermitian == PETSC_BOOL3_TRUE) { 101 PetscInt i; 102 for (i = 0; i < ncols; i++) { 103 PetscCall(MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES)); 104 } 105 } else { 106 PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES)); 107 } 108 #else 109 PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES)); 110 #endif 111 PetscCall(MatRestoreRow(A,r,&ncols,&row,&vals)); 112 } 113 PetscCall(MatRestoreRowUpperTriangular(A)); 114 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 115 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 116 117 if (reuse == MAT_INPLACE_MATRIX) { 118 PetscCall(MatHeaderReplace(A,&B)); 119 } else { 120 *newmat = B; 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 126 { 127 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 128 129 PetscFunctionBegin; 130 PetscCall(MatStoreValues(aij->A)); 131 PetscCall(MatStoreValues(aij->B)); 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 136 { 137 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 138 139 PetscFunctionBegin; 140 PetscCall(MatRetrieveValues(aij->A)); 141 PetscCall(MatRetrieveValues(aij->B)); 142 PetscFunctionReturn(0); 143 } 144 145 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 146 { \ 147 brow = row/bs; \ 148 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 149 rmax = aimax[brow]; nrow = ailen[brow]; \ 150 bcol = col/bs; \ 151 ridx = row % bs; cidx = col % bs; \ 152 low = 0; high = nrow; \ 153 while (high-low > 3) { \ 154 t = (low+high)/2; \ 155 if (rp[t] > bcol) high = t; \ 156 else low = t; \ 157 } \ 158 for (_i=low; _i<high; _i++) { \ 159 if (rp[_i] > bcol) break; \ 160 if (rp[_i] == bcol) { \ 161 bap = ap + bs2*_i + bs*cidx + ridx; \ 162 if (addv == ADD_VALUES) *bap += value; \ 163 else *bap = value; \ 164 goto a_noinsert; \ 165 } \ 166 } \ 167 if (a->nonew == 1) goto a_noinsert; \ 168 PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 169 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 170 N = nrow++ - 1; \ 171 /* shift up all the later entries in this row */ \ 172 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \ 173 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 174 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 175 rp[_i] = bcol; \ 176 ap[bs2*_i + bs*cidx + ridx] = value; \ 177 A->nonzerostate++;\ 178 a_noinsert:; \ 179 ailen[brow] = nrow; \ 180 } 181 182 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 183 { \ 184 brow = row/bs; \ 185 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 186 rmax = bimax[brow]; nrow = bilen[brow]; \ 187 bcol = col/bs; \ 188 ridx = row % bs; cidx = col % bs; \ 189 low = 0; high = nrow; \ 190 while (high-low > 3) { \ 191 t = (low+high)/2; \ 192 if (rp[t] > bcol) high = t; \ 193 else low = t; \ 194 } \ 195 for (_i=low; _i<high; _i++) { \ 196 if (rp[_i] > bcol) break; \ 197 if (rp[_i] == bcol) { \ 198 bap = ap + bs2*_i + bs*cidx + ridx; \ 199 if (addv == ADD_VALUES) *bap += value; \ 200 else *bap = value; \ 201 goto b_noinsert; \ 202 } \ 203 } \ 204 if (b->nonew == 1) goto b_noinsert; \ 205 PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 206 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 207 N = nrow++ - 1; \ 208 /* shift up all the later entries in this row */ \ 209 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \ 210 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 211 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 212 rp[_i] = bcol; \ 213 ap[bs2*_i + bs*cidx + ridx] = value; \ 214 B->nonzerostate++;\ 215 b_noinsert:; \ 216 bilen[brow] = nrow; \ 217 } 218 219 /* Only add/insert a(i,j) with i<=j (blocks). 220 Any a(i,j) with i>j input by user is ingored or generates an error 221 */ 222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 223 { 224 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 225 MatScalar value; 226 PetscBool roworiented = baij->roworiented; 227 PetscInt i,j,row,col; 228 PetscInt rstart_orig=mat->rmap->rstart; 229 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 230 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 231 232 /* Some Variables required in the macro */ 233 Mat A = baij->A; 234 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 235 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 236 MatScalar *aa =a->a; 237 238 Mat B = baij->B; 239 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 240 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 241 MatScalar *ba =b->a; 242 243 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 244 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 245 MatScalar *ap,*bap; 246 247 /* for stash */ 248 PetscInt n_loc, *in_loc = NULL; 249 MatScalar *v_loc = NULL; 250 251 PetscFunctionBegin; 252 if (!baij->donotstash) { 253 if (n > baij->n_loc) { 254 PetscCall(PetscFree(baij->in_loc)); 255 PetscCall(PetscFree(baij->v_loc)); 256 PetscCall(PetscMalloc1(n,&baij->in_loc)); 257 PetscCall(PetscMalloc1(n,&baij->v_loc)); 258 259 baij->n_loc = n; 260 } 261 in_loc = baij->in_loc; 262 v_loc = baij->v_loc; 263 } 264 265 for (i=0; i<m; i++) { 266 if (im[i] < 0) continue; 267 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 268 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 269 row = im[i] - rstart_orig; /* local row index */ 270 for (j=0; j<n; j++) { 271 if (im[i]/bs > in[j]/bs) { 272 if (a->ignore_ltriangular) { 273 continue; /* ignore lower triangular blocks */ 274 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 275 } 276 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 277 col = in[j] - cstart_orig; /* local col index */ 278 brow = row/bs; bcol = col/bs; 279 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 280 if (roworiented) value = v[i*n+j]; 281 else value = v[i+j*m]; 282 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 283 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 284 } else if (in[j] < 0) { 285 continue; 286 } else { 287 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 288 /* off-diag entry (B) */ 289 if (mat->was_assembled) { 290 if (!baij->colmap) { 291 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 292 } 293 #if defined(PETSC_USE_CTABLE) 294 PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col)); 295 col = col - 1; 296 #else 297 col = baij->colmap[in[j]/bs] - 1; 298 #endif 299 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 300 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 301 col = in[j]; 302 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 303 B = baij->B; 304 b = (Mat_SeqBAIJ*)(B)->data; 305 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 306 ba = b->a; 307 } else col += in[j]%bs; 308 } else col = in[j]; 309 if (roworiented) value = v[i*n+j]; 310 else value = v[i+j*m]; 311 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 312 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 313 } 314 } 315 } else { /* off processor entry */ 316 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 317 if (!baij->donotstash) { 318 mat->assembled = PETSC_FALSE; 319 n_loc = 0; 320 for (j=0; j<n; j++) { 321 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 322 in_loc[n_loc] = in[j]; 323 if (roworiented) { 324 v_loc[n_loc] = v[i*n+j]; 325 } else { 326 v_loc[n_loc] = v[j*m+i]; 327 } 328 n_loc++; 329 } 330 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE)); 331 } 332 } 333 } 334 PetscFunctionReturn(0); 335 } 336 337 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 338 { 339 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 340 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 341 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 342 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 343 PetscBool roworiented=a->roworiented; 344 const PetscScalar *value = v; 345 MatScalar *ap,*aa = a->a,*bap; 346 347 PetscFunctionBegin; 348 if (col < row) { 349 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 350 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 351 } 352 rp = aj + ai[row]; 353 ap = aa + bs2*ai[row]; 354 rmax = imax[row]; 355 nrow = ailen[row]; 356 value = v; 357 low = 0; 358 high = nrow; 359 360 while (high-low > 7) { 361 t = (low+high)/2; 362 if (rp[t] > col) high = t; 363 else low = t; 364 } 365 for (i=low; i<high; i++) { 366 if (rp[i] > col) break; 367 if (rp[i] == col) { 368 bap = ap + bs2*i; 369 if (roworiented) { 370 if (is == ADD_VALUES) { 371 for (ii=0; ii<bs; ii++) { 372 for (jj=ii; jj<bs2; jj+=bs) { 373 bap[jj] += *value++; 374 } 375 } 376 } else { 377 for (ii=0; ii<bs; ii++) { 378 for (jj=ii; jj<bs2; jj+=bs) { 379 bap[jj] = *value++; 380 } 381 } 382 } 383 } else { 384 if (is == ADD_VALUES) { 385 for (ii=0; ii<bs; ii++) { 386 for (jj=0; jj<bs; jj++) { 387 *bap++ += *value++; 388 } 389 } 390 } else { 391 for (ii=0; ii<bs; ii++) { 392 for (jj=0; jj<bs; jj++) { 393 *bap++ = *value++; 394 } 395 } 396 } 397 } 398 goto noinsert2; 399 } 400 } 401 if (nonew == 1) goto noinsert2; 402 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 403 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 404 N = nrow++ - 1; high++; 405 /* shift up all the later entries in this row */ 406 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 407 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 408 rp[i] = col; 409 bap = ap + bs2*i; 410 if (roworiented) { 411 for (ii=0; ii<bs; ii++) { 412 for (jj=ii; jj<bs2; jj+=bs) { 413 bap[jj] = *value++; 414 } 415 } 416 } else { 417 for (ii=0; ii<bs; ii++) { 418 for (jj=0; jj<bs; jj++) { 419 *bap++ = *value++; 420 } 421 } 422 } 423 noinsert2:; 424 ailen[row] = nrow; 425 PetscFunctionReturn(0); 426 } 427 428 /* 429 This routine is exactly duplicated in mpibaij.c 430 */ 431 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 432 { 433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 434 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 435 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 436 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 437 PetscBool roworiented=a->roworiented; 438 const PetscScalar *value = v; 439 MatScalar *ap,*aa = a->a,*bap; 440 441 PetscFunctionBegin; 442 rp = aj + ai[row]; 443 ap = aa + bs2*ai[row]; 444 rmax = imax[row]; 445 nrow = ailen[row]; 446 low = 0; 447 high = nrow; 448 value = v; 449 while (high-low > 7) { 450 t = (low+high)/2; 451 if (rp[t] > col) high = t; 452 else low = t; 453 } 454 for (i=low; i<high; i++) { 455 if (rp[i] > col) break; 456 if (rp[i] == col) { 457 bap = ap + bs2*i; 458 if (roworiented) { 459 if (is == ADD_VALUES) { 460 for (ii=0; ii<bs; ii++) { 461 for (jj=ii; jj<bs2; jj+=bs) { 462 bap[jj] += *value++; 463 } 464 } 465 } else { 466 for (ii=0; ii<bs; ii++) { 467 for (jj=ii; jj<bs2; jj+=bs) { 468 bap[jj] = *value++; 469 } 470 } 471 } 472 } else { 473 if (is == ADD_VALUES) { 474 for (ii=0; ii<bs; ii++,value+=bs) { 475 for (jj=0; jj<bs; jj++) { 476 bap[jj] += value[jj]; 477 } 478 bap += bs; 479 } 480 } else { 481 for (ii=0; ii<bs; ii++,value+=bs) { 482 for (jj=0; jj<bs; jj++) { 483 bap[jj] = value[jj]; 484 } 485 bap += bs; 486 } 487 } 488 } 489 goto noinsert2; 490 } 491 } 492 if (nonew == 1) goto noinsert2; 493 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 494 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 495 N = nrow++ - 1; high++; 496 /* shift up all the later entries in this row */ 497 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 498 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 499 rp[i] = col; 500 bap = ap + bs2*i; 501 if (roworiented) { 502 for (ii=0; ii<bs; ii++) { 503 for (jj=ii; jj<bs2; jj+=bs) { 504 bap[jj] = *value++; 505 } 506 } 507 } else { 508 for (ii=0; ii<bs; ii++) { 509 for (jj=0; jj<bs; jj++) { 510 *bap++ = *value++; 511 } 512 } 513 } 514 noinsert2:; 515 ailen[row] = nrow; 516 PetscFunctionReturn(0); 517 } 518 519 /* 520 This routine could be optimized by removing the need for the block copy below and passing stride information 521 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 522 */ 523 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 524 { 525 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 526 const MatScalar *value; 527 MatScalar *barray =baij->barray; 528 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 529 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 530 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 531 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 532 533 PetscFunctionBegin; 534 if (!barray) { 535 PetscCall(PetscMalloc1(bs2,&barray)); 536 baij->barray = barray; 537 } 538 539 if (roworiented) { 540 stepval = (n-1)*bs; 541 } else { 542 stepval = (m-1)*bs; 543 } 544 for (i=0; i<m; i++) { 545 if (im[i] < 0) continue; 546 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 547 if (im[i] >= rstart && im[i] < rend) { 548 row = im[i] - rstart; 549 for (j=0; j<n; j++) { 550 if (im[i] > in[j]) { 551 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 552 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 553 } 554 /* If NumCol = 1 then a copy is not required */ 555 if ((roworiented) && (n == 1)) { 556 barray = (MatScalar*) v + i*bs2; 557 } else if ((!roworiented) && (m == 1)) { 558 barray = (MatScalar*) v + j*bs2; 559 } else { /* Here a copy is required */ 560 if (roworiented) { 561 value = v + i*(stepval+bs)*bs + j*bs; 562 } else { 563 value = v + j*(stepval+bs)*bs + i*bs; 564 } 565 for (ii=0; ii<bs; ii++,value+=stepval) { 566 for (jj=0; jj<bs; jj++) { 567 *barray++ = *value++; 568 } 569 } 570 barray -=bs2; 571 } 572 573 if (in[j] >= cstart && in[j] < cend) { 574 col = in[j] - cstart; 575 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 576 } else if (in[j] < 0) { 577 continue; 578 } else { 579 PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 580 if (mat->was_assembled) { 581 if (!baij->colmap) { 582 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 583 } 584 585 #if defined(PETSC_USE_DEBUG) 586 #if defined(PETSC_USE_CTABLE) 587 { PetscInt data; 588 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 589 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 590 } 591 #else 592 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 593 #endif 594 #endif 595 #if defined(PETSC_USE_CTABLE) 596 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 597 col = (col - 1)/bs; 598 #else 599 col = (baij->colmap[in[j]] - 1)/bs; 600 #endif 601 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 602 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 603 col = in[j]; 604 } 605 } else col = in[j]; 606 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 607 } 608 } 609 } else { 610 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 611 if (!baij->donotstash) { 612 if (roworiented) { 613 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 614 } else { 615 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 616 } 617 } 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 624 { 625 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 626 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 627 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 628 629 PetscFunctionBegin; 630 for (i=0; i<m; i++) { 631 if (idxm[i] < 0) continue; /* negative row */ 632 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 633 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 634 row = idxm[i] - bsrstart; 635 for (j=0; j<n; j++) { 636 if (idxn[j] < 0) continue; /* negative column */ 637 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 638 if (idxn[j] >= bscstart && idxn[j] < bscend) { 639 col = idxn[j] - bscstart; 640 PetscCall(MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j)); 641 } else { 642 if (!baij->colmap) { 643 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 644 } 645 #if defined(PETSC_USE_CTABLE) 646 PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data)); 647 data--; 648 #else 649 data = baij->colmap[idxn[j]/bs]-1; 650 #endif 651 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 652 else { 653 col = data + idxn[j]%bs; 654 PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j)); 655 } 656 } 657 } 658 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 659 } 660 PetscFunctionReturn(0); 661 } 662 663 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 664 { 665 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 666 PetscReal sum[2],*lnorm2; 667 668 PetscFunctionBegin; 669 if (baij->size == 1) { 670 PetscCall(MatNorm(baij->A,type,norm)); 671 } else { 672 if (type == NORM_FROBENIUS) { 673 PetscCall(PetscMalloc1(2,&lnorm2)); 674 PetscCall(MatNorm(baij->A,type,lnorm2)); 675 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 676 PetscCall(MatNorm(baij->B,type,lnorm2)); 677 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 678 PetscCall(MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 679 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 680 PetscCall(PetscFree(lnorm2)); 681 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 682 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 683 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 684 PetscReal *rsum,*rsum2,vabs; 685 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 686 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 687 MatScalar *v; 688 689 PetscCall(PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2)); 690 PetscCall(PetscArrayzero(rsum,mat->cmap->N)); 691 /* Amat */ 692 v = amat->a; jj = amat->j; 693 for (brow=0; brow<mbs; brow++) { 694 grow = bs*(rstart + brow); 695 nz = amat->i[brow+1] - amat->i[brow]; 696 for (bcol=0; bcol<nz; bcol++) { 697 gcol = bs*(rstart + *jj); jj++; 698 for (col=0; col<bs; col++) { 699 for (row=0; row<bs; row++) { 700 vabs = PetscAbsScalar(*v); v++; 701 rsum[gcol+col] += vabs; 702 /* non-diagonal block */ 703 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 704 } 705 } 706 } 707 PetscCall(PetscLogFlops(nz*bs*bs)); 708 } 709 /* Bmat */ 710 v = bmat->a; jj = bmat->j; 711 for (brow=0; brow<mbs; brow++) { 712 grow = bs*(rstart + brow); 713 nz = bmat->i[brow+1] - bmat->i[brow]; 714 for (bcol=0; bcol<nz; bcol++) { 715 gcol = bs*garray[*jj]; jj++; 716 for (col=0; col<bs; col++) { 717 for (row=0; row<bs; row++) { 718 vabs = PetscAbsScalar(*v); v++; 719 rsum[gcol+col] += vabs; 720 rsum[grow+row] += vabs; 721 } 722 } 723 } 724 PetscCall(PetscLogFlops(nz*bs*bs)); 725 } 726 PetscCall(MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 727 *norm = 0.0; 728 for (col=0; col<mat->cmap->N; col++) { 729 if (rsum2[col] > *norm) *norm = rsum2[col]; 730 } 731 PetscCall(PetscFree2(rsum,rsum2)); 732 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 733 } 734 PetscFunctionReturn(0); 735 } 736 737 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 738 { 739 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 740 PetscInt nstash,reallocs; 741 742 PetscFunctionBegin; 743 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 744 745 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 746 PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs)); 747 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 748 PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 750 PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 751 PetscFunctionReturn(0); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 755 { 756 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 757 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 758 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 759 PetscInt *row,*col; 760 PetscBool other_disassembled; 761 PetscMPIInt n; 762 PetscBool r1,r2,r3; 763 MatScalar *val; 764 765 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 766 PetscFunctionBegin; 767 if (!baij->donotstash && !mat->nooffprocentries) { 768 while (1) { 769 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 770 if (!flg) break; 771 772 for (i=0; i<n;) { 773 /* Now identify the consecutive vals belonging to the same row */ 774 for (j=i,rstart=row[j]; j<n; j++) { 775 if (row[j] != rstart) break; 776 } 777 if (j < n) ncols = j-i; 778 else ncols = n-i; 779 /* Now assemble all these values with a single function call */ 780 PetscCall(MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 781 i = j; 782 } 783 } 784 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 785 /* Now process the block-stash. Since the values are stashed column-oriented, 786 set the roworiented flag to column oriented, and after MatSetValues() 787 restore the original flags */ 788 r1 = baij->roworiented; 789 r2 = a->roworiented; 790 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 791 792 baij->roworiented = PETSC_FALSE; 793 a->roworiented = PETSC_FALSE; 794 795 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 796 while (1) { 797 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg)); 798 if (!flg) break; 799 800 for (i=0; i<n;) { 801 /* Now identify the consecutive vals belonging to the same row */ 802 for (j=i,rstart=row[j]; j<n; j++) { 803 if (row[j] != rstart) break; 804 } 805 if (j < n) ncols = j-i; 806 else ncols = n-i; 807 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode)); 808 i = j; 809 } 810 } 811 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 812 813 baij->roworiented = r1; 814 a->roworiented = r2; 815 816 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 817 } 818 819 PetscCall(MatAssemblyBegin(baij->A,mode)); 820 PetscCall(MatAssemblyEnd(baij->A,mode)); 821 822 /* determine if any processor has disassembled, if so we must 823 also disassemble ourselves, in order that we may reassemble. */ 824 /* 825 if nonzero structure of submatrix B cannot change then we know that 826 no processor disassembled thus we can skip this stuff 827 */ 828 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 829 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 830 if (mat->was_assembled && !other_disassembled) { 831 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 832 } 833 } 834 835 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 836 PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ 837 } 838 PetscCall(MatAssemblyBegin(baij->B,mode)); 839 PetscCall(MatAssemblyEnd(baij->B,mode)); 840 841 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 842 843 baij->rowvalues = NULL; 844 845 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 846 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 847 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 848 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 849 } 850 PetscFunctionReturn(0); 851 } 852 853 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 854 #include <petscdraw.h> 855 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 856 { 857 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 858 PetscInt bs = mat->rmap->bs; 859 PetscMPIInt rank = baij->rank; 860 PetscBool iascii,isdraw; 861 PetscViewer sviewer; 862 PetscViewerFormat format; 863 864 PetscFunctionBegin; 865 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 866 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 867 if (iascii) { 868 PetscCall(PetscViewerGetFormat(viewer,&format)); 869 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 870 MatInfo info; 871 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 872 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 873 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 874 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory)); 875 PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info)); 876 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 877 PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info)); 878 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 879 PetscCall(PetscViewerFlush(viewer)); 880 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 881 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 882 PetscCall(VecScatterView(baij->Mvctx,viewer)); 883 PetscFunctionReturn(0); 884 } else if (format == PETSC_VIEWER_ASCII_INFO) { 885 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 886 PetscFunctionReturn(0); 887 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 888 PetscFunctionReturn(0); 889 } 890 } 891 892 if (isdraw) { 893 PetscDraw draw; 894 PetscBool isnull; 895 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 896 PetscCall(PetscDrawIsNull(draw,&isnull)); 897 if (isnull) PetscFunctionReturn(0); 898 } 899 900 { 901 /* assemble the entire matrix onto first processor. */ 902 Mat A; 903 Mat_SeqSBAIJ *Aloc; 904 Mat_SeqBAIJ *Bloc; 905 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 906 MatScalar *a; 907 const char *matname; 908 909 /* Should this be the same type as mat? */ 910 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 911 if (rank == 0) { 912 PetscCall(MatSetSizes(A,M,N,M,N)); 913 } else { 914 PetscCall(MatSetSizes(A,0,0,M,N)); 915 } 916 PetscCall(MatSetType(A,MATMPISBAIJ)); 917 PetscCall(MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL)); 918 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 919 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 920 921 /* copy over the A part */ 922 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 923 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 924 PetscCall(PetscMalloc1(bs,&rvals)); 925 926 for (i=0; i<mbs; i++) { 927 rvals[0] = bs*(baij->rstartbs + i); 928 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 929 for (j=ai[i]; j<ai[i+1]; j++) { 930 col = (baij->cstartbs+aj[j])*bs; 931 for (k=0; k<bs; k++) { 932 PetscCall(MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 933 col++; 934 a += bs; 935 } 936 } 937 } 938 /* copy over the B part */ 939 Bloc = (Mat_SeqBAIJ*)baij->B->data; 940 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 941 for (i=0; i<mbs; i++) { 942 943 rvals[0] = bs*(baij->rstartbs + i); 944 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 945 for (j=ai[i]; j<ai[i+1]; j++) { 946 col = baij->garray[aj[j]]*bs; 947 for (k=0; k<bs; k++) { 948 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 949 col++; 950 a += bs; 951 } 952 } 953 } 954 PetscCall(PetscFree(rvals)); 955 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 956 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 957 /* 958 Everyone has to call to draw the matrix since the graphics waits are 959 synchronized across all processors that share the PetscDraw object 960 */ 961 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 962 PetscCall(PetscObjectGetName((PetscObject)mat,&matname)); 963 if (rank == 0) { 964 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname)); 965 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer)); 966 } 967 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 968 PetscCall(PetscViewerFlush(viewer)); 969 PetscCall(MatDestroy(&A)); 970 } 971 PetscFunctionReturn(0); 972 } 973 974 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 975 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 976 977 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 978 { 979 PetscBool iascii,isdraw,issocket,isbinary; 980 981 PetscFunctionBegin; 982 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 983 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 984 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 985 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 986 if (iascii || isdraw || issocket) { 987 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer)); 988 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat,viewer)); 989 PetscFunctionReturn(0); 990 } 991 992 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 993 { 994 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 995 996 PetscFunctionBegin; 997 #if defined(PETSC_USE_LOG) 998 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 999 #endif 1000 PetscCall(MatStashDestroy_Private(&mat->stash)); 1001 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1002 PetscCall(MatDestroy(&baij->A)); 1003 PetscCall(MatDestroy(&baij->B)); 1004 #if defined(PETSC_USE_CTABLE) 1005 PetscCall(PetscTableDestroy(&baij->colmap)); 1006 #else 1007 PetscCall(PetscFree(baij->colmap)); 1008 #endif 1009 PetscCall(PetscFree(baij->garray)); 1010 PetscCall(VecDestroy(&baij->lvec)); 1011 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1012 PetscCall(VecDestroy(&baij->slvec0)); 1013 PetscCall(VecDestroy(&baij->slvec0b)); 1014 PetscCall(VecDestroy(&baij->slvec1)); 1015 PetscCall(VecDestroy(&baij->slvec1a)); 1016 PetscCall(VecDestroy(&baij->slvec1b)); 1017 PetscCall(VecScatterDestroy(&baij->sMvctx)); 1018 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 1019 PetscCall(PetscFree(baij->barray)); 1020 PetscCall(PetscFree(baij->hd)); 1021 PetscCall(VecDestroy(&baij->diag)); 1022 PetscCall(VecDestroy(&baij->bb1)); 1023 PetscCall(VecDestroy(&baij->xx1)); 1024 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1025 PetscCall(PetscFree(baij->setvaluescopy)); 1026 #endif 1027 PetscCall(PetscFree(baij->in_loc)); 1028 PetscCall(PetscFree(baij->v_loc)); 1029 PetscCall(PetscFree(baij->rangebs)); 1030 PetscCall(PetscFree(mat->data)); 1031 1032 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1033 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1034 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1035 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL)); 1036 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL)); 1037 #if defined(PETSC_HAVE_ELEMENTAL) 1038 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL)); 1039 #endif 1040 #if defined(PETSC_HAVE_SCALAPACK) 1041 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL)); 1042 #endif 1043 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL)); 1044 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL)); 1045 PetscFunctionReturn(0); 1046 } 1047 1048 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1049 { 1050 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1051 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1052 PetscScalar *from; 1053 const PetscScalar *x; 1054 1055 PetscFunctionBegin; 1056 /* diagonal part */ 1057 PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a)); 1058 PetscCall(VecSet(a->slvec1b,0.0)); 1059 1060 /* subdiagonal part */ 1061 PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name); 1062 PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b)); 1063 1064 /* copy x into the vec slvec0 */ 1065 PetscCall(VecGetArray(a->slvec0,&from)); 1066 PetscCall(VecGetArrayRead(xx,&x)); 1067 1068 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1069 PetscCall(VecRestoreArray(a->slvec0,&from)); 1070 PetscCall(VecRestoreArrayRead(xx,&x)); 1071 1072 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1073 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1074 /* supperdiagonal part */ 1075 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy)); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1080 { 1081 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1082 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1083 PetscScalar *from; 1084 const PetscScalar *x; 1085 1086 PetscFunctionBegin; 1087 /* diagonal part */ 1088 PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a)); 1089 PetscCall(VecSet(a->slvec1b,0.0)); 1090 1091 /* subdiagonal part */ 1092 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b)); 1093 1094 /* copy x into the vec slvec0 */ 1095 PetscCall(VecGetArray(a->slvec0,&from)); 1096 PetscCall(VecGetArrayRead(xx,&x)); 1097 1098 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1099 PetscCall(VecRestoreArray(a->slvec0,&from)); 1100 PetscCall(VecRestoreArrayRead(xx,&x)); 1101 1102 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1103 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1104 /* supperdiagonal part */ 1105 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy)); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz) 1110 { 1111 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1112 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1113 PetscScalar *from,zero=0.0; 1114 const PetscScalar *x; 1115 1116 PetscFunctionBegin; 1117 /* diagonal part */ 1118 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a)); 1119 PetscCall(VecSet(a->slvec1b,zero)); 1120 1121 /* subdiagonal part */ 1122 PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name); 1123 PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b)); 1124 1125 /* copy x into the vec slvec0 */ 1126 PetscCall(VecGetArray(a->slvec0,&from)); 1127 PetscCall(VecGetArrayRead(xx,&x)); 1128 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1129 PetscCall(VecRestoreArray(a->slvec0,&from)); 1130 1131 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1132 PetscCall(VecRestoreArrayRead(xx,&x)); 1133 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1134 1135 /* supperdiagonal part */ 1136 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz)); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1141 { 1142 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1143 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1144 PetscScalar *from,zero=0.0; 1145 const PetscScalar *x; 1146 1147 PetscFunctionBegin; 1148 /* diagonal part */ 1149 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a)); 1150 PetscCall(VecSet(a->slvec1b,zero)); 1151 1152 /* subdiagonal part */ 1153 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b)); 1154 1155 /* copy x into the vec slvec0 */ 1156 PetscCall(VecGetArray(a->slvec0,&from)); 1157 PetscCall(VecGetArrayRead(xx,&x)); 1158 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1159 PetscCall(VecRestoreArray(a->slvec0,&from)); 1160 1161 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1162 PetscCall(VecRestoreArrayRead(xx,&x)); 1163 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1164 1165 /* supperdiagonal part */ 1166 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz)); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 /* 1171 This only works correctly for square matrices where the subblock A->A is the 1172 diagonal block 1173 */ 1174 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1175 { 1176 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1177 1178 PetscFunctionBegin; 1179 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1180 PetscCall(MatGetDiagonal(a->A,v)); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1185 { 1186 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1187 1188 PetscFunctionBegin; 1189 PetscCall(MatScale(a->A,aa)); 1190 PetscCall(MatScale(a->B,aa)); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1195 { 1196 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1197 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1198 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1199 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1200 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1201 1202 PetscFunctionBegin; 1203 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1204 mat->getrowactive = PETSC_TRUE; 1205 1206 if (!mat->rowvalues && (idx || v)) { 1207 /* 1208 allocate enough space to hold information from the longest row. 1209 */ 1210 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1211 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1212 PetscInt max = 1,mbs = mat->mbs,tmp; 1213 for (i=0; i<mbs; i++) { 1214 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1215 if (max < tmp) max = tmp; 1216 } 1217 PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices)); 1218 } 1219 1220 PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1221 lrow = row - brstart; /* local row index */ 1222 1223 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1224 if (!v) {pvA = NULL; pvB = NULL;} 1225 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1226 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1227 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1228 nztot = nzA + nzB; 1229 1230 cmap = mat->garray; 1231 if (v || idx) { 1232 if (nztot) { 1233 /* Sort by increasing column numbers, assuming A and B already sorted */ 1234 PetscInt imark = -1; 1235 if (v) { 1236 *v = v_p = mat->rowvalues; 1237 for (i=0; i<nzB; i++) { 1238 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1239 else break; 1240 } 1241 imark = i; 1242 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1243 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1244 } 1245 if (idx) { 1246 *idx = idx_p = mat->rowindices; 1247 if (imark > -1) { 1248 for (i=0; i<imark; i++) { 1249 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1250 } 1251 } else { 1252 for (i=0; i<nzB; i++) { 1253 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1254 else break; 1255 } 1256 imark = i; 1257 } 1258 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1259 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1260 } 1261 } else { 1262 if (idx) *idx = NULL; 1263 if (v) *v = NULL; 1264 } 1265 } 1266 *nz = nztot; 1267 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1268 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1273 { 1274 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1275 1276 PetscFunctionBegin; 1277 PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1278 baij->getrowactive = PETSC_FALSE; 1279 PetscFunctionReturn(0); 1280 } 1281 1282 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1283 { 1284 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1285 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1286 1287 PetscFunctionBegin; 1288 aA->getrow_utriangular = PETSC_TRUE; 1289 PetscFunctionReturn(0); 1290 } 1291 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1292 { 1293 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1294 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1295 1296 PetscFunctionBegin; 1297 aA->getrow_utriangular = PETSC_FALSE; 1298 PetscFunctionReturn(0); 1299 } 1300 1301 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1302 { 1303 PetscFunctionBegin; 1304 if (PetscDefined(USE_COMPLEX)) { 1305 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 1306 1307 PetscCall(MatConjugate(a->A)); 1308 PetscCall(MatConjugate(a->B)); 1309 } 1310 PetscFunctionReturn(0); 1311 } 1312 1313 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1314 { 1315 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1316 1317 PetscFunctionBegin; 1318 PetscCall(MatRealPart(a->A)); 1319 PetscCall(MatRealPart(a->B)); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1324 { 1325 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1326 1327 PetscFunctionBegin; 1328 PetscCall(MatImaginaryPart(a->A)); 1329 PetscCall(MatImaginaryPart(a->B)); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1334 Input: isrow - distributed(parallel), 1335 iscol_local - locally owned (seq) 1336 */ 1337 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1338 { 1339 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1340 const PetscInt *ptr1,*ptr2; 1341 1342 PetscFunctionBegin; 1343 PetscCall(ISGetLocalSize(isrow,&sz1)); 1344 PetscCall(ISGetLocalSize(iscol_local,&sz2)); 1345 if (sz1 > sz2) { 1346 *flg = PETSC_FALSE; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 PetscCall(ISGetIndices(isrow,&ptr1)); 1351 PetscCall(ISGetIndices(iscol_local,&ptr2)); 1352 1353 PetscCall(PetscMalloc1(sz1,&a1)); 1354 PetscCall(PetscMalloc1(sz2,&a2)); 1355 PetscCall(PetscArraycpy(a1,ptr1,sz1)); 1356 PetscCall(PetscArraycpy(a2,ptr2,sz2)); 1357 PetscCall(PetscSortInt(sz1,a1)); 1358 PetscCall(PetscSortInt(sz2,a2)); 1359 1360 nmatch=0; 1361 k = 0; 1362 for (i=0; i<sz1; i++) { 1363 for (j=k; j<sz2; j++) { 1364 if (a1[i] == a2[j]) { 1365 k = j; nmatch++; 1366 break; 1367 } 1368 } 1369 } 1370 PetscCall(ISRestoreIndices(isrow,&ptr1)); 1371 PetscCall(ISRestoreIndices(iscol_local,&ptr2)); 1372 PetscCall(PetscFree(a1)); 1373 PetscCall(PetscFree(a2)); 1374 if (nmatch < sz1) { 1375 *flg = PETSC_FALSE; 1376 } else { 1377 *flg = PETSC_TRUE; 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1383 { 1384 IS iscol_local; 1385 PetscInt csize; 1386 PetscBool isequal; 1387 1388 PetscFunctionBegin; 1389 PetscCall(ISGetLocalSize(iscol,&csize)); 1390 if (call == MAT_REUSE_MATRIX) { 1391 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 1392 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1393 } else { 1394 PetscBool issorted; 1395 1396 PetscCall(ISAllGather(iscol,&iscol_local)); 1397 PetscCall(ISEqual_private(isrow,iscol_local,&isequal)); 1398 PetscCall(ISSorted(iscol_local, &issorted)); 1399 PetscCheck(isequal && issorted,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow and be sorted"); 1400 } 1401 1402 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1403 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat)); 1404 if (call == MAT_INITIAL_MATRIX) { 1405 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 1406 PetscCall(ISDestroy(&iscol_local)); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1412 { 1413 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1414 1415 PetscFunctionBegin; 1416 PetscCall(MatZeroEntries(l->A)); 1417 PetscCall(MatZeroEntries(l->B)); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1422 { 1423 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1424 Mat A = a->A,B = a->B; 1425 PetscLogDouble isend[5],irecv[5]; 1426 1427 PetscFunctionBegin; 1428 info->block_size = (PetscReal)matin->rmap->bs; 1429 1430 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1431 1432 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1433 isend[3] = info->memory; isend[4] = info->mallocs; 1434 1435 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1436 1437 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1438 isend[3] += info->memory; isend[4] += info->mallocs; 1439 if (flag == MAT_LOCAL) { 1440 info->nz_used = isend[0]; 1441 info->nz_allocated = isend[1]; 1442 info->nz_unneeded = isend[2]; 1443 info->memory = isend[3]; 1444 info->mallocs = isend[4]; 1445 } else if (flag == MAT_GLOBAL_MAX) { 1446 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1447 1448 info->nz_used = irecv[0]; 1449 info->nz_allocated = irecv[1]; 1450 info->nz_unneeded = irecv[2]; 1451 info->memory = irecv[3]; 1452 info->mallocs = irecv[4]; 1453 } else if (flag == MAT_GLOBAL_SUM) { 1454 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1455 1456 info->nz_used = irecv[0]; 1457 info->nz_allocated = irecv[1]; 1458 info->nz_unneeded = irecv[2]; 1459 info->memory = irecv[3]; 1460 info->mallocs = irecv[4]; 1461 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1462 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1463 info->fill_ratio_needed = 0; 1464 info->factor_mallocs = 0; 1465 PetscFunctionReturn(0); 1466 } 1467 1468 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1469 { 1470 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1471 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1472 1473 PetscFunctionBegin; 1474 switch (op) { 1475 case MAT_NEW_NONZERO_LOCATIONS: 1476 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1477 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1478 case MAT_KEEP_NONZERO_PATTERN: 1479 case MAT_SUBMAT_SINGLEIS: 1480 case MAT_NEW_NONZERO_LOCATION_ERR: 1481 MatCheckPreallocated(A,1); 1482 PetscCall(MatSetOption(a->A,op,flg)); 1483 PetscCall(MatSetOption(a->B,op,flg)); 1484 break; 1485 case MAT_ROW_ORIENTED: 1486 MatCheckPreallocated(A,1); 1487 a->roworiented = flg; 1488 1489 PetscCall(MatSetOption(a->A,op,flg)); 1490 PetscCall(MatSetOption(a->B,op,flg)); 1491 break; 1492 case MAT_FORCE_DIAGONAL_ENTRIES: 1493 case MAT_SORTED_FULL: 1494 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1495 break; 1496 case MAT_IGNORE_OFF_PROC_ENTRIES: 1497 a->donotstash = flg; 1498 break; 1499 case MAT_USE_HASH_TABLE: 1500 a->ht_flag = flg; 1501 break; 1502 case MAT_HERMITIAN: 1503 MatCheckPreallocated(A,1); 1504 PetscCall(MatSetOption(a->A,op,flg)); 1505 #if defined(PETSC_USE_COMPLEX) 1506 if (flg) { /* need different mat-vec ops */ 1507 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1508 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1509 A->ops->multtranspose = NULL; 1510 A->ops->multtransposeadd = NULL; 1511 A->symmetric = PETSC_BOOL3_FALSE; 1512 } 1513 #endif 1514 break; 1515 case MAT_SPD: 1516 case MAT_SYMMETRIC: 1517 MatCheckPreallocated(A,1); 1518 PetscCall(MatSetOption(a->A,op,flg)); 1519 #if defined(PETSC_USE_COMPLEX) 1520 if (flg) { /* restore to use default mat-vec ops */ 1521 A->ops->mult = MatMult_MPISBAIJ; 1522 A->ops->multadd = MatMultAdd_MPISBAIJ; 1523 A->ops->multtranspose = MatMult_MPISBAIJ; 1524 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1525 } 1526 #endif 1527 break; 1528 case MAT_STRUCTURALLY_SYMMETRIC: 1529 MatCheckPreallocated(A,1); 1530 PetscCall(MatSetOption(a->A,op,flg)); 1531 break; 1532 case MAT_SYMMETRY_ETERNAL: 1533 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1534 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1535 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1536 break; 1537 case MAT_SPD_ETERNAL: 1538 break; 1539 case MAT_IGNORE_LOWER_TRIANGULAR: 1540 aA->ignore_ltriangular = flg; 1541 break; 1542 case MAT_ERROR_LOWER_TRIANGULAR: 1543 aA->ignore_ltriangular = flg; 1544 break; 1545 case MAT_GETROW_UPPERTRIANGULAR: 1546 aA->getrow_utriangular = flg; 1547 break; 1548 default: 1549 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1550 } 1551 PetscFunctionReturn(0); 1552 } 1553 1554 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1555 { 1556 PetscFunctionBegin; 1557 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*B)); 1558 if (reuse == MAT_INITIAL_MATRIX) { 1559 PetscCall(MatDuplicate(A,MAT_COPY_VALUES,B)); 1560 } else if (reuse == MAT_REUSE_MATRIX) { 1561 PetscCall(MatCopy(A,*B,SAME_NONZERO_PATTERN)); 1562 } 1563 PetscFunctionReturn(0); 1564 } 1565 1566 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1567 { 1568 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1569 Mat a = baij->A, b=baij->B; 1570 PetscInt nv,m,n; 1571 PetscBool flg; 1572 1573 PetscFunctionBegin; 1574 if (ll != rr) { 1575 PetscCall(VecEqual(ll,rr,&flg)); 1576 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same"); 1577 } 1578 if (!ll) PetscFunctionReturn(0); 1579 1580 PetscCall(MatGetLocalSize(mat,&m,&n)); 1581 PetscCheck(m == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same",m,n); 1582 1583 PetscCall(VecGetLocalSize(rr,&nv)); 1584 PetscCheck(nv==n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1585 1586 PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1587 1588 /* left diagonalscale the off-diagonal part */ 1589 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1590 1591 /* scale the diagonal part */ 1592 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1593 1594 /* right diagonalscale the off-diagonal part */ 1595 PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1596 PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec)); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1601 { 1602 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1603 1604 PetscFunctionBegin; 1605 PetscCall(MatSetUnfactored(a->A)); 1606 PetscFunctionReturn(0); 1607 } 1608 1609 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1610 1611 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1612 { 1613 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1614 Mat a,b,c,d; 1615 PetscBool flg; 1616 1617 PetscFunctionBegin; 1618 a = matA->A; b = matA->B; 1619 c = matB->A; d = matB->B; 1620 1621 PetscCall(MatEqual(a,c,&flg)); 1622 if (flg) { 1623 PetscCall(MatEqual(b,d,&flg)); 1624 } 1625 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1630 { 1631 PetscBool isbaij; 1632 1633 PetscFunctionBegin; 1634 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"")); 1635 PetscCheck(isbaij,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1636 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1637 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1638 PetscCall(MatGetRowUpperTriangular(A)); 1639 PetscCall(MatCopy_Basic(A,B,str)); 1640 PetscCall(MatRestoreRowUpperTriangular(A)); 1641 } else { 1642 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1643 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1644 1645 PetscCall(MatCopy(a->A,b->A,str)); 1646 PetscCall(MatCopy(a->B,b->B,str)); 1647 } 1648 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1649 PetscFunctionReturn(0); 1650 } 1651 1652 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1653 { 1654 PetscFunctionBegin; 1655 PetscCall(MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 1656 PetscFunctionReturn(0); 1657 } 1658 1659 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1660 { 1661 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1662 PetscBLASInt bnz,one=1; 1663 Mat_SeqSBAIJ *xa,*ya; 1664 Mat_SeqBAIJ *xb,*yb; 1665 1666 PetscFunctionBegin; 1667 if (str == SAME_NONZERO_PATTERN) { 1668 PetscScalar alpha = a; 1669 xa = (Mat_SeqSBAIJ*)xx->A->data; 1670 ya = (Mat_SeqSBAIJ*)yy->A->data; 1671 PetscCall(PetscBLASIntCast(xa->nz,&bnz)); 1672 PetscCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1673 xb = (Mat_SeqBAIJ*)xx->B->data; 1674 yb = (Mat_SeqBAIJ*)yy->B->data; 1675 PetscCall(PetscBLASIntCast(xb->nz,&bnz)); 1676 PetscCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1677 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1678 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1679 PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 1680 PetscCall(MatAXPY_Basic(Y,a,X,str)); 1681 PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 1682 } else { 1683 Mat B; 1684 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1685 PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1686 PetscCall(MatGetRowUpperTriangular(X)); 1687 PetscCall(MatGetRowUpperTriangular(Y)); 1688 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 1689 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 1690 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 1691 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 1692 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 1693 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 1694 PetscCall(MatSetType(B,MATMPISBAIJ)); 1695 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d)); 1696 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 1697 PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o)); 1698 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 1699 PetscCall(MatHeaderMerge(Y,&B)); 1700 PetscCall(PetscFree(nnz_d)); 1701 PetscCall(PetscFree(nnz_o)); 1702 PetscCall(MatRestoreRowUpperTriangular(X)); 1703 PetscCall(MatRestoreRowUpperTriangular(Y)); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1709 { 1710 PetscInt i; 1711 PetscBool flg; 1712 1713 PetscFunctionBegin; 1714 PetscCall(MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B)); /* B[] are sbaij matrices */ 1715 for (i=0; i<n; i++) { 1716 PetscCall(ISEqual(irow[i],icol[i],&flg)); 1717 if (!flg) { 1718 PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1719 } 1720 } 1721 PetscFunctionReturn(0); 1722 } 1723 1724 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1725 { 1726 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1727 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1728 1729 PetscFunctionBegin; 1730 if (!Y->preallocated) { 1731 PetscCall(MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL)); 1732 } else if (!aij->nz) { 1733 PetscInt nonew = aij->nonew; 1734 PetscCall(MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL)); 1735 aij->nonew = nonew; 1736 } 1737 PetscCall(MatShift_Basic(Y,a)); 1738 PetscFunctionReturn(0); 1739 } 1740 1741 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1742 { 1743 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1744 1745 PetscFunctionBegin; 1746 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1747 PetscCall(MatMissingDiagonal(a->A,missing,d)); 1748 if (d) { 1749 PetscInt rstart; 1750 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 1751 *d += rstart/A->rmap->bs; 1752 1753 } 1754 PetscFunctionReturn(0); 1755 } 1756 1757 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1758 { 1759 PetscFunctionBegin; 1760 *a = ((Mat_MPISBAIJ*)A->data)->A; 1761 PetscFunctionReturn(0); 1762 } 1763 1764 /* -------------------------------------------------------------------*/ 1765 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1766 MatGetRow_MPISBAIJ, 1767 MatRestoreRow_MPISBAIJ, 1768 MatMult_MPISBAIJ, 1769 /* 4*/ MatMultAdd_MPISBAIJ, 1770 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1771 MatMultAdd_MPISBAIJ, 1772 NULL, 1773 NULL, 1774 NULL, 1775 /* 10*/ NULL, 1776 NULL, 1777 NULL, 1778 MatSOR_MPISBAIJ, 1779 MatTranspose_MPISBAIJ, 1780 /* 15*/ MatGetInfo_MPISBAIJ, 1781 MatEqual_MPISBAIJ, 1782 MatGetDiagonal_MPISBAIJ, 1783 MatDiagonalScale_MPISBAIJ, 1784 MatNorm_MPISBAIJ, 1785 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1786 MatAssemblyEnd_MPISBAIJ, 1787 MatSetOption_MPISBAIJ, 1788 MatZeroEntries_MPISBAIJ, 1789 /* 24*/ NULL, 1790 NULL, 1791 NULL, 1792 NULL, 1793 NULL, 1794 /* 29*/ MatSetUp_MPISBAIJ, 1795 NULL, 1796 NULL, 1797 MatGetDiagonalBlock_MPISBAIJ, 1798 NULL, 1799 /* 34*/ MatDuplicate_MPISBAIJ, 1800 NULL, 1801 NULL, 1802 NULL, 1803 NULL, 1804 /* 39*/ MatAXPY_MPISBAIJ, 1805 MatCreateSubMatrices_MPISBAIJ, 1806 MatIncreaseOverlap_MPISBAIJ, 1807 MatGetValues_MPISBAIJ, 1808 MatCopy_MPISBAIJ, 1809 /* 44*/ NULL, 1810 MatScale_MPISBAIJ, 1811 MatShift_MPISBAIJ, 1812 NULL, 1813 NULL, 1814 /* 49*/ NULL, 1815 NULL, 1816 NULL, 1817 NULL, 1818 NULL, 1819 /* 54*/ NULL, 1820 NULL, 1821 MatSetUnfactored_MPISBAIJ, 1822 NULL, 1823 MatSetValuesBlocked_MPISBAIJ, 1824 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1825 NULL, 1826 NULL, 1827 NULL, 1828 NULL, 1829 /* 64*/ NULL, 1830 NULL, 1831 NULL, 1832 NULL, 1833 NULL, 1834 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1835 NULL, 1836 MatConvert_MPISBAIJ_Basic, 1837 NULL, 1838 NULL, 1839 /* 74*/ NULL, 1840 NULL, 1841 NULL, 1842 NULL, 1843 NULL, 1844 /* 79*/ NULL, 1845 NULL, 1846 NULL, 1847 NULL, 1848 MatLoad_MPISBAIJ, 1849 /* 84*/ NULL, 1850 NULL, 1851 NULL, 1852 NULL, 1853 NULL, 1854 /* 89*/ NULL, 1855 NULL, 1856 NULL, 1857 NULL, 1858 NULL, 1859 /* 94*/ NULL, 1860 NULL, 1861 NULL, 1862 NULL, 1863 NULL, 1864 /* 99*/ NULL, 1865 NULL, 1866 NULL, 1867 MatConjugate_MPISBAIJ, 1868 NULL, 1869 /*104*/ NULL, 1870 MatRealPart_MPISBAIJ, 1871 MatImaginaryPart_MPISBAIJ, 1872 MatGetRowUpperTriangular_MPISBAIJ, 1873 MatRestoreRowUpperTriangular_MPISBAIJ, 1874 /*109*/ NULL, 1875 NULL, 1876 NULL, 1877 NULL, 1878 MatMissingDiagonal_MPISBAIJ, 1879 /*114*/ NULL, 1880 NULL, 1881 NULL, 1882 NULL, 1883 NULL, 1884 /*119*/ NULL, 1885 NULL, 1886 NULL, 1887 NULL, 1888 NULL, 1889 /*124*/ NULL, 1890 NULL, 1891 NULL, 1892 NULL, 1893 NULL, 1894 /*129*/ NULL, 1895 NULL, 1896 NULL, 1897 NULL, 1898 NULL, 1899 /*134*/ NULL, 1900 NULL, 1901 NULL, 1902 NULL, 1903 NULL, 1904 /*139*/ MatSetBlockSizes_Default, 1905 NULL, 1906 NULL, 1907 NULL, 1908 NULL, 1909 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1910 NULL, 1911 NULL, 1912 NULL, 1913 NULL, 1914 NULL, 1915 /*150*/ NULL 1916 }; 1917 1918 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1919 { 1920 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1921 PetscInt i,mbs,Mbs; 1922 PetscMPIInt size; 1923 1924 PetscFunctionBegin; 1925 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 1926 PetscCall(PetscLayoutSetUp(B->rmap)); 1927 PetscCall(PetscLayoutSetUp(B->cmap)); 1928 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 1929 PetscCheck(B->rmap->N <= B->cmap->N,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->N,B->cmap->N); 1930 PetscCheck(B->rmap->n <= B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->n,B->cmap->n); 1931 1932 mbs = B->rmap->n/bs; 1933 Mbs = B->rmap->N/bs; 1934 PetscCheck(mbs*bs == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,bs); 1935 1936 B->rmap->bs = bs; 1937 b->bs2 = bs*bs; 1938 b->mbs = mbs; 1939 b->Mbs = Mbs; 1940 b->nbs = B->cmap->n/bs; 1941 b->Nbs = B->cmap->N/bs; 1942 1943 for (i=0; i<=b->size; i++) { 1944 b->rangebs[i] = B->rmap->range[i]/bs; 1945 } 1946 b->rstartbs = B->rmap->rstart/bs; 1947 b->rendbs = B->rmap->rend/bs; 1948 1949 b->cstartbs = B->cmap->rstart/bs; 1950 b->cendbs = B->cmap->rend/bs; 1951 1952 #if defined(PETSC_USE_CTABLE) 1953 PetscCall(PetscTableDestroy(&b->colmap)); 1954 #else 1955 PetscCall(PetscFree(b->colmap)); 1956 #endif 1957 PetscCall(PetscFree(b->garray)); 1958 PetscCall(VecDestroy(&b->lvec)); 1959 PetscCall(VecScatterDestroy(&b->Mvctx)); 1960 PetscCall(VecDestroy(&b->slvec0)); 1961 PetscCall(VecDestroy(&b->slvec0b)); 1962 PetscCall(VecDestroy(&b->slvec1)); 1963 PetscCall(VecDestroy(&b->slvec1a)); 1964 PetscCall(VecDestroy(&b->slvec1b)); 1965 PetscCall(VecScatterDestroy(&b->sMvctx)); 1966 1967 /* Because the B will have been resized we simply destroy it and create a new one each time */ 1968 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 1969 PetscCall(MatDestroy(&b->B)); 1970 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 1971 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 1972 PetscCall(MatSetType(b->B,MATSEQBAIJ)); 1973 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 1974 1975 if (!B->preallocated) { 1976 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 1977 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 1978 PetscCall(MatSetType(b->A,MATSEQSBAIJ)); 1979 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 1980 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash)); 1981 } 1982 1983 PetscCall(MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz)); 1984 PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz)); 1985 1986 B->preallocated = PETSC_TRUE; 1987 B->was_assembled = PETSC_FALSE; 1988 B->assembled = PETSC_FALSE; 1989 PetscFunctionReturn(0); 1990 } 1991 1992 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1993 { 1994 PetscInt m,rstart,cend; 1995 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 1996 const PetscInt *JJ =NULL; 1997 PetscScalar *values=NULL; 1998 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 1999 PetscBool nooffprocentries; 2000 2001 PetscFunctionBegin; 2002 PetscCheck(bs >= 1,PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs); 2003 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 2004 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 2005 PetscCall(PetscLayoutSetUp(B->rmap)); 2006 PetscCall(PetscLayoutSetUp(B->cmap)); 2007 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2008 m = B->rmap->n/bs; 2009 rstart = B->rmap->rstart/bs; 2010 cend = B->cmap->rend/bs; 2011 2012 PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2013 PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz)); 2014 for (i=0; i<m; i++) { 2015 nz = ii[i+1] - ii[i]; 2016 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2017 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2018 JJ = jj + ii[i]; 2019 bd = 0; 2020 for (j=0; j<nz; j++) { 2021 if (*JJ >= i + rstart) break; 2022 JJ++; 2023 bd++; 2024 } 2025 d = 0; 2026 for (; j<nz; j++) { 2027 if (*JJ++ >= cend) break; 2028 d++; 2029 } 2030 d_nnz[i] = d; 2031 o_nnz[i] = nz - d - bd; 2032 nz = nz - bd; 2033 nz_max = PetscMax(nz_max,nz); 2034 } 2035 PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz)); 2036 PetscCall(MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)); 2037 PetscCall(PetscFree2(d_nnz,o_nnz)); 2038 2039 values = (PetscScalar*)V; 2040 if (!values) { 2041 PetscCall(PetscCalloc1(bs*bs*nz_max,&values)); 2042 } 2043 for (i=0; i<m; i++) { 2044 PetscInt row = i + rstart; 2045 PetscInt ncols = ii[i+1] - ii[i]; 2046 const PetscInt *icols = jj + ii[i]; 2047 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2048 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2049 PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES)); 2050 } else { /* block ordering does not match so we can only insert one block at a time. */ 2051 PetscInt j; 2052 for (j=0; j<ncols; j++) { 2053 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2054 PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES)); 2055 } 2056 } 2057 } 2058 2059 if (!V) PetscCall(PetscFree(values)); 2060 nooffprocentries = B->nooffprocentries; 2061 B->nooffprocentries = PETSC_TRUE; 2062 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2063 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2064 B->nooffprocentries = nooffprocentries; 2065 2066 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2067 PetscFunctionReturn(0); 2068 } 2069 2070 /*MC 2071 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2072 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2073 the matrix is stored. 2074 2075 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2076 can call MatSetOption(Mat, MAT_HERMITIAN); 2077 2078 Options Database Keys: 2079 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2080 2081 Notes: 2082 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2083 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2084 2085 Level: beginner 2086 2087 .seealso: `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2088 M*/ 2089 2090 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2091 { 2092 Mat_MPISBAIJ *b; 2093 PetscBool flg = PETSC_FALSE; 2094 2095 PetscFunctionBegin; 2096 PetscCall(PetscNewLog(B,&b)); 2097 B->data = (void*)b; 2098 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 2099 2100 B->ops->destroy = MatDestroy_MPISBAIJ; 2101 B->ops->view = MatView_MPISBAIJ; 2102 B->assembled = PETSC_FALSE; 2103 B->insertmode = NOT_SET_VALUES; 2104 2105 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 2106 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size)); 2107 2108 /* build local table of row and column ownerships */ 2109 PetscCall(PetscMalloc1(b->size+2,&b->rangebs)); 2110 2111 /* build cache for off array entries formed */ 2112 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 2113 2114 b->donotstash = PETSC_FALSE; 2115 b->colmap = NULL; 2116 b->garray = NULL; 2117 b->roworiented = PETSC_TRUE; 2118 2119 /* stuff used in block assembly */ 2120 b->barray = NULL; 2121 2122 /* stuff used for matrix vector multiply */ 2123 b->lvec = NULL; 2124 b->Mvctx = NULL; 2125 b->slvec0 = NULL; 2126 b->slvec0b = NULL; 2127 b->slvec1 = NULL; 2128 b->slvec1a = NULL; 2129 b->slvec1b = NULL; 2130 b->sMvctx = NULL; 2131 2132 /* stuff for MatGetRow() */ 2133 b->rowindices = NULL; 2134 b->rowvalues = NULL; 2135 b->getrowactive = PETSC_FALSE; 2136 2137 /* hash table stuff */ 2138 b->ht = NULL; 2139 b->hd = NULL; 2140 b->ht_size = 0; 2141 b->ht_flag = PETSC_FALSE; 2142 b->ht_fact = 0; 2143 b->ht_total_ct = 0; 2144 b->ht_insert_ct = 0; 2145 2146 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2147 b->ijonly = PETSC_FALSE; 2148 2149 b->in_loc = NULL; 2150 b->v_loc = NULL; 2151 b->n_loc = 0; 2152 2153 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ)); 2154 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ)); 2155 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ)); 2156 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2157 #if defined(PETSC_HAVE_ELEMENTAL) 2158 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental)); 2159 #endif 2160 #if defined(PETSC_HAVE_SCALAPACK) 2161 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK)); 2162 #endif 2163 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic)); 2164 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic)); 2165 2166 B->symmetric = PETSC_BOOL3_TRUE; 2167 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2168 B->symmetry_eternal = PETSC_TRUE; 2169 B->structural_symmetry_eternal = PETSC_TRUE ; 2170 #if defined(PETSC_USE_COMPLEX) 2171 B->hermitian = PETSC_BOOL3_FALSE; 2172 #else 2173 B->hermitian = PETSC_BOOL3_TRUE; 2174 #endif 2175 2176 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ)); 2177 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat"); 2178 PetscCall(PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL)); 2179 if (flg) { 2180 PetscReal fact = 1.39; 2181 PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE)); 2182 PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL)); 2183 if (fact <= 1.0) fact = 1.39; 2184 PetscCall(MatMPIBAIJSetHashTableFactor(B,fact)); 2185 PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact)); 2186 } 2187 PetscOptionsEnd(); 2188 PetscFunctionReturn(0); 2189 } 2190 2191 /*MC 2192 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2193 2194 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2195 and MATMPISBAIJ otherwise. 2196 2197 Options Database Keys: 2198 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2199 2200 Level: beginner 2201 2202 .seealso: `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2203 M*/ 2204 2205 /*@C 2206 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2207 the user should preallocate the matrix storage by setting the parameters 2208 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2209 performance can be increased by more than a factor of 50. 2210 2211 Collective on Mat 2212 2213 Input Parameters: 2214 + B - the matrix 2215 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2216 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2217 . d_nz - number of block nonzeros per block row in diagonal portion of local 2218 submatrix (same for all local rows) 2219 . d_nnz - array containing the number of block nonzeros in the various block rows 2220 in the upper triangular and diagonal part of the in diagonal portion of the local 2221 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2222 for the diagonal entry and set a value even if it is zero. 2223 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2224 submatrix (same for all local rows). 2225 - o_nnz - array containing the number of nonzeros in the various block rows of the 2226 off-diagonal portion of the local submatrix that is right of the diagonal 2227 (possibly different for each block row) or NULL. 2228 2229 Options Database Keys: 2230 + -mat_no_unroll - uses code that does not unroll the loops in the 2231 block calculations (much slower) 2232 - -mat_block_size - size of the blocks to use 2233 2234 Notes: 2235 2236 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2237 than it must be used on all processors that share the object for that argument. 2238 2239 If the *_nnz parameter is given then the *_nz parameter is ignored 2240 2241 Storage Information: 2242 For a square global matrix we define each processor's diagonal portion 2243 to be its local rows and the corresponding columns (a square submatrix); 2244 each processor's off-diagonal portion encompasses the remainder of the 2245 local matrix (a rectangular submatrix). 2246 2247 The user can specify preallocated storage for the diagonal part of 2248 the local submatrix with either d_nz or d_nnz (not both). Set 2249 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2250 memory allocation. Likewise, specify preallocated storage for the 2251 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2252 2253 You can call MatGetInfo() to get information on how effective the preallocation was; 2254 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2255 You can also run with the option -info and look for messages with the string 2256 malloc in them to see if additional memory allocation was needed. 2257 2258 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2259 the figure below we depict these three local rows and all columns (0-11). 2260 2261 .vb 2262 0 1 2 3 4 5 6 7 8 9 10 11 2263 -------------------------- 2264 row 3 |. . . d d d o o o o o o 2265 row 4 |. . . d d d o o o o o o 2266 row 5 |. . . d d d o o o o o o 2267 -------------------------- 2268 .ve 2269 2270 Thus, any entries in the d locations are stored in the d (diagonal) 2271 submatrix, and any entries in the o locations are stored in the 2272 o (off-diagonal) submatrix. Note that the d matrix is stored in 2273 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2274 2275 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2276 plus the diagonal part of the d matrix, 2277 and o_nz should indicate the number of block nonzeros per row in the o matrix 2278 2279 In general, for PDE problems in which most nonzeros are near the diagonal, 2280 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2281 or you will get TERRIBLE performance; see the users' manual chapter on 2282 matrices. 2283 2284 Level: intermediate 2285 2286 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2287 @*/ 2288 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2289 { 2290 PetscFunctionBegin; 2291 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2292 PetscValidType(B,1); 2293 PetscValidLogicalCollectiveInt(B,bs,2); 2294 PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz)); 2295 PetscFunctionReturn(0); 2296 } 2297 2298 /*@C 2299 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2300 (block compressed row). For good matrix assembly performance 2301 the user should preallocate the matrix storage by setting the parameters 2302 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2303 performance can be increased by more than a factor of 50. 2304 2305 Collective 2306 2307 Input Parameters: 2308 + comm - MPI communicator 2309 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2310 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2311 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2312 This value should be the same as the local size used in creating the 2313 y vector for the matrix-vector product y = Ax. 2314 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2315 This value should be the same as the local size used in creating the 2316 x vector for the matrix-vector product y = Ax. 2317 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2318 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2319 . d_nz - number of block nonzeros per block row in diagonal portion of local 2320 submatrix (same for all local rows) 2321 . d_nnz - array containing the number of block nonzeros in the various block rows 2322 in the upper triangular portion of the in diagonal portion of the local 2323 (possibly different for each block block row) or NULL. 2324 If you plan to factor the matrix you must leave room for the diagonal entry and 2325 set its value even if it is zero. 2326 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2327 submatrix (same for all local rows). 2328 - o_nnz - array containing the number of nonzeros in the various block rows of the 2329 off-diagonal portion of the local submatrix (possibly different for 2330 each block row) or NULL. 2331 2332 Output Parameter: 2333 . A - the matrix 2334 2335 Options Database Keys: 2336 + -mat_no_unroll - uses code that does not unroll the loops in the 2337 block calculations (much slower) 2338 . -mat_block_size - size of the blocks to use 2339 - -mat_mpi - use the parallel matrix data structures even on one processor 2340 (defaults to using SeqBAIJ format on one processor) 2341 2342 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2343 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2344 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2345 2346 Notes: 2347 The number of rows and columns must be divisible by blocksize. 2348 This matrix type does not support complex Hermitian operation. 2349 2350 The user MUST specify either the local or global matrix dimensions 2351 (possibly both). 2352 2353 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2354 than it must be used on all processors that share the object for that argument. 2355 2356 If the *_nnz parameter is given then the *_nz parameter is ignored 2357 2358 Storage Information: 2359 For a square global matrix we define each processor's diagonal portion 2360 to be its local rows and the corresponding columns (a square submatrix); 2361 each processor's off-diagonal portion encompasses the remainder of the 2362 local matrix (a rectangular submatrix). 2363 2364 The user can specify preallocated storage for the diagonal part of 2365 the local submatrix with either d_nz or d_nnz (not both). Set 2366 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2367 memory allocation. Likewise, specify preallocated storage for the 2368 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2369 2370 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2371 the figure below we depict these three local rows and all columns (0-11). 2372 2373 .vb 2374 0 1 2 3 4 5 6 7 8 9 10 11 2375 -------------------------- 2376 row 3 |. . . d d d o o o o o o 2377 row 4 |. . . d d d o o o o o o 2378 row 5 |. . . d d d o o o o o o 2379 -------------------------- 2380 .ve 2381 2382 Thus, any entries in the d locations are stored in the d (diagonal) 2383 submatrix, and any entries in the o locations are stored in the 2384 o (off-diagonal) submatrix. Note that the d matrix is stored in 2385 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2386 2387 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2388 plus the diagonal part of the d matrix, 2389 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2390 In general, for PDE problems in which most nonzeros are near the diagonal, 2391 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2392 or you will get TERRIBLE performance; see the users' manual chapter on 2393 matrices. 2394 2395 Level: intermediate 2396 2397 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2398 @*/ 2399 2400 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2401 { 2402 PetscMPIInt size; 2403 2404 PetscFunctionBegin; 2405 PetscCall(MatCreate(comm,A)); 2406 PetscCall(MatSetSizes(*A,m,n,M,N)); 2407 PetscCallMPI(MPI_Comm_size(comm,&size)); 2408 if (size > 1) { 2409 PetscCall(MatSetType(*A,MATMPISBAIJ)); 2410 PetscCall(MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz)); 2411 } else { 2412 PetscCall(MatSetType(*A,MATSEQSBAIJ)); 2413 PetscCall(MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz)); 2414 } 2415 PetscFunctionReturn(0); 2416 } 2417 2418 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2419 { 2420 Mat mat; 2421 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2422 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2423 PetscScalar *array; 2424 2425 PetscFunctionBegin; 2426 *newmat = NULL; 2427 2428 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2429 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2430 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2431 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2432 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2433 2434 mat->factortype = matin->factortype; 2435 mat->preallocated = PETSC_TRUE; 2436 mat->assembled = PETSC_TRUE; 2437 mat->insertmode = NOT_SET_VALUES; 2438 2439 a = (Mat_MPISBAIJ*)mat->data; 2440 a->bs2 = oldmat->bs2; 2441 a->mbs = oldmat->mbs; 2442 a->nbs = oldmat->nbs; 2443 a->Mbs = oldmat->Mbs; 2444 a->Nbs = oldmat->Nbs; 2445 2446 a->size = oldmat->size; 2447 a->rank = oldmat->rank; 2448 a->donotstash = oldmat->donotstash; 2449 a->roworiented = oldmat->roworiented; 2450 a->rowindices = NULL; 2451 a->rowvalues = NULL; 2452 a->getrowactive = PETSC_FALSE; 2453 a->barray = NULL; 2454 a->rstartbs = oldmat->rstartbs; 2455 a->rendbs = oldmat->rendbs; 2456 a->cstartbs = oldmat->cstartbs; 2457 a->cendbs = oldmat->cendbs; 2458 2459 /* hash table stuff */ 2460 a->ht = NULL; 2461 a->hd = NULL; 2462 a->ht_size = 0; 2463 a->ht_flag = oldmat->ht_flag; 2464 a->ht_fact = oldmat->ht_fact; 2465 a->ht_total_ct = 0; 2466 a->ht_insert_ct = 0; 2467 2468 PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2)); 2469 if (oldmat->colmap) { 2470 #if defined(PETSC_USE_CTABLE) 2471 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2472 #else 2473 PetscCall(PetscMalloc1(a->Nbs,&a->colmap)); 2474 PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt))); 2475 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs)); 2476 #endif 2477 } else a->colmap = NULL; 2478 2479 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2480 PetscCall(PetscMalloc1(len,&a->garray)); 2481 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2482 PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2483 } else a->garray = NULL; 2484 2485 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash)); 2486 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2487 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2488 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2489 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2490 2491 PetscCall(VecDuplicate(oldmat->slvec0,&a->slvec0)); 2492 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0)); 2493 PetscCall(VecDuplicate(oldmat->slvec1,&a->slvec1)); 2494 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1)); 2495 2496 PetscCall(VecGetLocalSize(a->slvec1,&nt)); 2497 PetscCall(VecGetArray(a->slvec1,&array)); 2498 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a)); 2499 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b)); 2500 PetscCall(VecRestoreArray(a->slvec1,&array)); 2501 PetscCall(VecGetArray(a->slvec0,&array)); 2502 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b)); 2503 PetscCall(VecRestoreArray(a->slvec0,&array)); 2504 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0)); 2505 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1)); 2506 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b)); 2507 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a)); 2508 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b)); 2509 2510 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2511 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2512 a->sMvctx = oldmat->sMvctx; 2513 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx)); 2514 2515 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2516 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2517 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2518 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2519 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2520 *newmat = mat; 2521 PetscFunctionReturn(0); 2522 } 2523 2524 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2525 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2526 2527 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer) 2528 { 2529 PetscBool isbinary; 2530 2531 PetscFunctionBegin; 2532 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2533 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 2534 PetscCall(MatLoad_MPISBAIJ_Binary(mat,viewer)); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 /*XXXXX@ 2539 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2540 2541 Input Parameters: 2542 . mat - the matrix 2543 . fact - factor 2544 2545 Not Collective on Mat, each process can have a different hash factor 2546 2547 Level: advanced 2548 2549 Notes: 2550 This can also be set by the command line option: -mat_use_hash_table fact 2551 2552 .seealso: `MatSetOption()` 2553 @XXXXX*/ 2554 2555 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2556 { 2557 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2558 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2559 PetscReal atmp; 2560 PetscReal *work,*svalues,*rvalues; 2561 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2562 PetscMPIInt rank,size; 2563 PetscInt *rowners_bs,dest,count,source; 2564 PetscScalar *va; 2565 MatScalar *ba; 2566 MPI_Status stat; 2567 2568 PetscFunctionBegin; 2569 PetscCheck(!idx,PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2570 PetscCall(MatGetRowMaxAbs(a->A,v,NULL)); 2571 PetscCall(VecGetArray(v,&va)); 2572 2573 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 2574 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 2575 2576 bs = A->rmap->bs; 2577 mbs = a->mbs; 2578 Mbs = a->Mbs; 2579 ba = b->a; 2580 bi = b->i; 2581 bj = b->j; 2582 2583 /* find ownerships */ 2584 rowners_bs = A->rmap->range; 2585 2586 /* each proc creates an array to be distributed */ 2587 PetscCall(PetscCalloc1(bs*Mbs,&work)); 2588 2589 /* row_max for B */ 2590 if (rank != size-1) { 2591 for (i=0; i<mbs; i++) { 2592 ncols = bi[1] - bi[0]; bi++; 2593 brow = bs*i; 2594 for (j=0; j<ncols; j++) { 2595 bcol = bs*(*bj); 2596 for (kcol=0; kcol<bs; kcol++) { 2597 col = bcol + kcol; /* local col index */ 2598 col += rowners_bs[rank+1]; /* global col index */ 2599 for (krow=0; krow<bs; krow++) { 2600 atmp = PetscAbsScalar(*ba); ba++; 2601 row = brow + krow; /* local row index */ 2602 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2603 if (work[col] < atmp) work[col] = atmp; 2604 } 2605 } 2606 bj++; 2607 } 2608 } 2609 2610 /* send values to its owners */ 2611 for (dest=rank+1; dest<size; dest++) { 2612 svalues = work + rowners_bs[dest]; 2613 count = rowners_bs[dest+1]-rowners_bs[dest]; 2614 PetscCallMPI(MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A))); 2615 } 2616 } 2617 2618 /* receive values */ 2619 if (rank) { 2620 rvalues = work; 2621 count = rowners_bs[rank+1]-rowners_bs[rank]; 2622 for (source=0; source<rank; source++) { 2623 PetscCallMPI(MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat)); 2624 /* process values */ 2625 for (i=0; i<count; i++) { 2626 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2627 } 2628 } 2629 } 2630 2631 PetscCall(VecRestoreArray(v,&va)); 2632 PetscCall(PetscFree(work)); 2633 PetscFunctionReturn(0); 2634 } 2635 2636 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2637 { 2638 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2639 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2640 PetscScalar *x,*ptr,*from; 2641 Vec bb1; 2642 const PetscScalar *b; 2643 2644 PetscFunctionBegin; 2645 PetscCheck(its > 0 && lits > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 2646 PetscCheck(bs <= 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2647 2648 if (flag == SOR_APPLY_UPPER) { 2649 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2650 PetscFunctionReturn(0); 2651 } 2652 2653 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2654 if (flag & SOR_ZERO_INITIAL_GUESS) { 2655 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx)); 2656 its--; 2657 } 2658 2659 PetscCall(VecDuplicate(bb,&bb1)); 2660 while (its--) { 2661 2662 /* lower triangular part: slvec0b = - B^T*xx */ 2663 PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b)); 2664 2665 /* copy xx into slvec0a */ 2666 PetscCall(VecGetArray(mat->slvec0,&ptr)); 2667 PetscCall(VecGetArray(xx,&x)); 2668 PetscCall(PetscArraycpy(ptr,x,bs*mbs)); 2669 PetscCall(VecRestoreArray(mat->slvec0,&ptr)); 2670 2671 PetscCall(VecScale(mat->slvec0,-1.0)); 2672 2673 /* copy bb into slvec1a */ 2674 PetscCall(VecGetArray(mat->slvec1,&ptr)); 2675 PetscCall(VecGetArrayRead(bb,&b)); 2676 PetscCall(PetscArraycpy(ptr,b,bs*mbs)); 2677 PetscCall(VecRestoreArray(mat->slvec1,&ptr)); 2678 2679 /* set slvec1b = 0 */ 2680 PetscCall(VecSet(mat->slvec1b,0.0)); 2681 2682 PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2683 PetscCall(VecRestoreArray(xx,&x)); 2684 PetscCall(VecRestoreArrayRead(bb,&b)); 2685 PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2686 2687 /* upper triangular part: bb1 = bb1 - B*x */ 2688 PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1)); 2689 2690 /* local diagonal sweep */ 2691 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx)); 2692 } 2693 PetscCall(VecDestroy(&bb1)); 2694 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2695 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2696 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2697 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2698 } else if (flag & SOR_EISENSTAT) { 2699 Vec xx1; 2700 PetscBool hasop; 2701 const PetscScalar *diag; 2702 PetscScalar *sl,scale = (omega - 2.0)/omega; 2703 PetscInt i,n; 2704 2705 if (!mat->xx1) { 2706 PetscCall(VecDuplicate(bb,&mat->xx1)); 2707 PetscCall(VecDuplicate(bb,&mat->bb1)); 2708 } 2709 xx1 = mat->xx1; 2710 bb1 = mat->bb1; 2711 2712 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 2713 2714 if (!mat->diag) { 2715 /* this is wrong for same matrix with new nonzero values */ 2716 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 2717 PetscCall(MatGetDiagonal(matin,mat->diag)); 2718 } 2719 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 2720 2721 if (hasop) { 2722 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 2723 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2724 } else { 2725 /* 2726 These two lines are replaced by code that may be a bit faster for a good compiler 2727 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2728 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2729 */ 2730 PetscCall(VecGetArray(mat->slvec1a,&sl)); 2731 PetscCall(VecGetArrayRead(mat->diag,&diag)); 2732 PetscCall(VecGetArrayRead(bb,&b)); 2733 PetscCall(VecGetArray(xx,&x)); 2734 PetscCall(VecGetLocalSize(xx,&n)); 2735 if (omega == 1.0) { 2736 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2737 PetscCall(PetscLogFlops(2.0*n)); 2738 } else { 2739 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2740 PetscCall(PetscLogFlops(3.0*n)); 2741 } 2742 PetscCall(VecRestoreArray(mat->slvec1a,&sl)); 2743 PetscCall(VecRestoreArrayRead(mat->diag,&diag)); 2744 PetscCall(VecRestoreArrayRead(bb,&b)); 2745 PetscCall(VecRestoreArray(xx,&x)); 2746 } 2747 2748 /* multiply off-diagonal portion of matrix */ 2749 PetscCall(VecSet(mat->slvec1b,0.0)); 2750 PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b)); 2751 PetscCall(VecGetArray(mat->slvec0,&from)); 2752 PetscCall(VecGetArray(xx,&x)); 2753 PetscCall(PetscArraycpy(from,x,bs*mbs)); 2754 PetscCall(VecRestoreArray(mat->slvec0,&from)); 2755 PetscCall(VecRestoreArray(xx,&x)); 2756 PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2757 PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2758 PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a)); 2759 2760 /* local sweep */ 2761 PetscCall((*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 2762 PetscCall(VecAXPY(xx,1.0,xx1)); 2763 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2764 PetscFunctionReturn(0); 2765 } 2766 2767 /*@ 2768 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2769 CSR format the local rows. 2770 2771 Collective 2772 2773 Input Parameters: 2774 + comm - MPI communicator 2775 . bs - the block size, only a block size of 1 is supported 2776 . m - number of local rows (Cannot be PETSC_DECIDE) 2777 . n - This value should be the same as the local size used in creating the 2778 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2779 calculated if N is given) For square matrices n is almost always m. 2780 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2781 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2782 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2783 . j - column indices 2784 - a - matrix values 2785 2786 Output Parameter: 2787 . mat - the matrix 2788 2789 Level: intermediate 2790 2791 Notes: 2792 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2793 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2794 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2795 2796 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2797 2798 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2799 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2800 @*/ 2801 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2802 { 2803 PetscFunctionBegin; 2804 PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2805 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2806 PetscCall(MatCreate(comm,mat)); 2807 PetscCall(MatSetSizes(*mat,m,n,M,N)); 2808 PetscCall(MatSetType(*mat,MATMPISBAIJ)); 2809 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a)); 2810 PetscFunctionReturn(0); 2811 } 2812 2813 /*@C 2814 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 2815 2816 Collective 2817 2818 Input Parameters: 2819 + B - the matrix 2820 . bs - the block size 2821 . i - the indices into j for the start of each local row (starts with zero) 2822 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2823 - v - optional values in the matrix 2824 2825 Level: advanced 2826 2827 Notes: 2828 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2829 and usually the numerical values as well 2830 2831 Any entries below the diagonal are ignored 2832 2833 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2834 @*/ 2835 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2836 { 2837 PetscFunctionBegin; 2838 PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 2839 PetscFunctionReturn(0); 2840 } 2841 2842 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2843 { 2844 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2845 PetscInt *indx; 2846 PetscScalar *values; 2847 2848 PetscFunctionBegin; 2849 PetscCall(MatGetSize(inmat,&m,&N)); 2850 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2851 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2852 PetscInt *dnz,*onz,mbs,Nbs,nbs; 2853 PetscInt *bindx,rmax=a->rmax,j; 2854 PetscMPIInt rank,size; 2855 2856 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 2857 mbs = m/bs; Nbs = N/cbs; 2858 if (n == PETSC_DECIDE) { 2859 PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N)); 2860 } 2861 nbs = n/cbs; 2862 2863 PetscCall(PetscMalloc1(rmax,&bindx)); 2864 MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */ 2865 2866 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 2867 PetscCallMPI(MPI_Comm_rank(comm,&size)); 2868 if (rank == size-1) { 2869 /* Check sum(nbs) = Nbs */ 2870 PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 2871 } 2872 2873 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2874 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 2875 for (i=0; i<mbs; i++) { 2876 PetscCall(MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */ 2877 nnz = nnz/bs; 2878 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2879 PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz)); 2880 PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); 2881 } 2882 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 2883 PetscCall(PetscFree(bindx)); 2884 2885 PetscCall(MatCreate(comm,outmat)); 2886 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 2887 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 2888 PetscCall(MatSetType(*outmat,MATSBAIJ)); 2889 PetscCall(MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz)); 2890 PetscCall(MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz)); 2891 MatPreallocateEnd(dnz,onz); 2892 } 2893 2894 /* numeric phase */ 2895 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 2896 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 2897 2898 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 2899 for (i=0; i<m; i++) { 2900 PetscCall(MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values)); 2901 Ii = i + rstart; 2902 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 2903 PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values)); 2904 } 2905 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 2906 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 2907 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 2908 PetscFunctionReturn(0); 2909 } 2910