1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #if defined(PETSC_HAVE_SCALAPACK) 11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 12 #endif 13 14 /* This could be moved to matimpl.h */ 15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 16 { 17 Mat preallocator; 18 PetscInt r,rstart,rend; 19 PetscInt bs,i,m,n,M,N; 20 PetscBool cong = PETSC_TRUE; 21 22 PetscFunctionBegin; 23 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 24 PetscValidLogicalCollectiveInt(B,nm,2); 25 for (i = 0; i < nm; i++) { 26 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 27 PetscCall(PetscLayoutCompare(B->rmap,X[i]->rmap,&cong)); 28 PetscCheck(cong,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 29 } 30 PetscValidLogicalCollectiveBool(B,fill,5); 31 PetscCall(MatGetBlockSize(B,&bs)); 32 PetscCall(MatGetSize(B,&M,&N)); 33 PetscCall(MatGetLocalSize(B,&m,&n)); 34 PetscCall(MatCreate(PetscObjectComm((PetscObject)B),&preallocator)); 35 PetscCall(MatSetType(preallocator,MATPREALLOCATOR)); 36 PetscCall(MatSetBlockSize(preallocator,bs)); 37 PetscCall(MatSetSizes(preallocator,m,n,M,N)); 38 PetscCall(MatSetUp(preallocator)); 39 PetscCall(MatGetOwnershipRange(preallocator,&rstart,&rend)); 40 for (r = rstart; r < rend; ++r) { 41 PetscInt ncols; 42 const PetscInt *row; 43 const PetscScalar *vals; 44 45 for (i = 0; i < nm; i++) { 46 PetscCall(MatGetRow(X[i],r,&ncols,&row,&vals)); 47 PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES)); 48 if (symm && symm[i]) { 49 PetscCall(MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES)); 50 } 51 PetscCall(MatRestoreRow(X[i],r,&ncols,&row,&vals)); 52 } 53 } 54 PetscCall(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY)); 55 PetscCall(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY)); 56 PetscCall(MatPreallocatorPreallocate(preallocator,fill,B)); 57 PetscCall(MatDestroy(&preallocator)); 58 PetscFunctionReturn(0); 59 } 60 61 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 62 { 63 Mat B; 64 PetscInt r; 65 66 PetscFunctionBegin; 67 if (reuse != MAT_REUSE_MATRIX) { 68 PetscBool symm = PETSC_TRUE,isdense; 69 PetscInt bs; 70 71 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 72 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 73 PetscCall(MatSetType(B,newtype)); 74 PetscCall(MatGetBlockSize(A,&bs)); 75 PetscCall(MatSetBlockSize(B,bs)); 76 PetscCall(PetscLayoutSetUp(B->rmap)); 77 PetscCall(PetscLayoutSetUp(B->cmap)); 78 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"")); 79 if (!isdense) { 80 PetscCall(MatGetRowUpperTriangular(A)); 81 PetscCall(MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE)); 82 PetscCall(MatRestoreRowUpperTriangular(A)); 83 } else { 84 PetscCall(MatSetUp(B)); 85 } 86 } else { 87 B = *newmat; 88 PetscCall(MatZeroEntries(B)); 89 } 90 91 PetscCall(MatGetRowUpperTriangular(A)); 92 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 93 PetscInt ncols; 94 const PetscInt *row; 95 const PetscScalar *vals; 96 97 PetscCall(MatGetRow(A,r,&ncols,&row,&vals)); 98 PetscCall(MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES)); 99 #if defined(PETSC_USE_COMPLEX) 100 if (A->hermitian == PETSC_BOOL3_TRUE) { 101 PetscInt i; 102 for (i = 0; i < ncols; i++) { 103 PetscCall(MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES)); 104 } 105 } else { 106 PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES)); 107 } 108 #else 109 PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES)); 110 #endif 111 PetscCall(MatRestoreRow(A,r,&ncols,&row,&vals)); 112 } 113 PetscCall(MatRestoreRowUpperTriangular(A)); 114 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 115 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 116 117 if (reuse == MAT_INPLACE_MATRIX) { 118 PetscCall(MatHeaderReplace(A,&B)); 119 } else { 120 *newmat = B; 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 126 { 127 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 128 129 PetscFunctionBegin; 130 PetscCall(MatStoreValues(aij->A)); 131 PetscCall(MatStoreValues(aij->B)); 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 136 { 137 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 138 139 PetscFunctionBegin; 140 PetscCall(MatRetrieveValues(aij->A)); 141 PetscCall(MatRetrieveValues(aij->B)); 142 PetscFunctionReturn(0); 143 } 144 145 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 146 { \ 147 brow = row/bs; \ 148 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 149 rmax = aimax[brow]; nrow = ailen[brow]; \ 150 bcol = col/bs; \ 151 ridx = row % bs; cidx = col % bs; \ 152 low = 0; high = nrow; \ 153 while (high-low > 3) { \ 154 t = (low+high)/2; \ 155 if (rp[t] > bcol) high = t; \ 156 else low = t; \ 157 } \ 158 for (_i=low; _i<high; _i++) { \ 159 if (rp[_i] > bcol) break; \ 160 if (rp[_i] == bcol) { \ 161 bap = ap + bs2*_i + bs*cidx + ridx; \ 162 if (addv == ADD_VALUES) *bap += value; \ 163 else *bap = value; \ 164 goto a_noinsert; \ 165 } \ 166 } \ 167 if (a->nonew == 1) goto a_noinsert; \ 168 PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 169 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 170 N = nrow++ - 1; \ 171 /* shift up all the later entries in this row */ \ 172 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \ 173 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 174 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 175 rp[_i] = bcol; \ 176 ap[bs2*_i + bs*cidx + ridx] = value; \ 177 A->nonzerostate++;\ 178 a_noinsert:; \ 179 ailen[brow] = nrow; \ 180 } 181 182 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 183 { \ 184 brow = row/bs; \ 185 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 186 rmax = bimax[brow]; nrow = bilen[brow]; \ 187 bcol = col/bs; \ 188 ridx = row % bs; cidx = col % bs; \ 189 low = 0; high = nrow; \ 190 while (high-low > 3) { \ 191 t = (low+high)/2; \ 192 if (rp[t] > bcol) high = t; \ 193 else low = t; \ 194 } \ 195 for (_i=low; _i<high; _i++) { \ 196 if (rp[_i] > bcol) break; \ 197 if (rp[_i] == bcol) { \ 198 bap = ap + bs2*_i + bs*cidx + ridx; \ 199 if (addv == ADD_VALUES) *bap += value; \ 200 else *bap = value; \ 201 goto b_noinsert; \ 202 } \ 203 } \ 204 if (b->nonew == 1) goto b_noinsert; \ 205 PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 206 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 207 N = nrow++ - 1; \ 208 /* shift up all the later entries in this row */ \ 209 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \ 210 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 211 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 212 rp[_i] = bcol; \ 213 ap[bs2*_i + bs*cidx + ridx] = value; \ 214 B->nonzerostate++;\ 215 b_noinsert:; \ 216 bilen[brow] = nrow; \ 217 } 218 219 /* Only add/insert a(i,j) with i<=j (blocks). 220 Any a(i,j) with i>j input by user is ingored or generates an error 221 */ 222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 223 { 224 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 225 MatScalar value; 226 PetscBool roworiented = baij->roworiented; 227 PetscInt i,j,row,col; 228 PetscInt rstart_orig=mat->rmap->rstart; 229 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 230 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 231 232 /* Some Variables required in the macro */ 233 Mat A = baij->A; 234 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 235 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 236 MatScalar *aa =a->a; 237 238 Mat B = baij->B; 239 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 240 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 241 MatScalar *ba =b->a; 242 243 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 244 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 245 MatScalar *ap,*bap; 246 247 /* for stash */ 248 PetscInt n_loc, *in_loc = NULL; 249 MatScalar *v_loc = NULL; 250 251 PetscFunctionBegin; 252 if (!baij->donotstash) { 253 if (n > baij->n_loc) { 254 PetscCall(PetscFree(baij->in_loc)); 255 PetscCall(PetscFree(baij->v_loc)); 256 PetscCall(PetscMalloc1(n,&baij->in_loc)); 257 PetscCall(PetscMalloc1(n,&baij->v_loc)); 258 259 baij->n_loc = n; 260 } 261 in_loc = baij->in_loc; 262 v_loc = baij->v_loc; 263 } 264 265 for (i=0; i<m; i++) { 266 if (im[i] < 0) continue; 267 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 268 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 269 row = im[i] - rstart_orig; /* local row index */ 270 for (j=0; j<n; j++) { 271 if (im[i]/bs > in[j]/bs) { 272 if (a->ignore_ltriangular) { 273 continue; /* ignore lower triangular blocks */ 274 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 275 } 276 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 277 col = in[j] - cstart_orig; /* local col index */ 278 brow = row/bs; bcol = col/bs; 279 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 280 if (roworiented) value = v[i*n+j]; 281 else value = v[i+j*m]; 282 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 283 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 284 } else if (in[j] < 0) { 285 continue; 286 } else { 287 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 288 /* off-diag entry (B) */ 289 if (mat->was_assembled) { 290 if (!baij->colmap) { 291 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 292 } 293 #if defined(PETSC_USE_CTABLE) 294 PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col)); 295 col = col - 1; 296 #else 297 col = baij->colmap[in[j]/bs] - 1; 298 #endif 299 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 300 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 301 col = in[j]; 302 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 303 B = baij->B; 304 b = (Mat_SeqBAIJ*)(B)->data; 305 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 306 ba = b->a; 307 } else col += in[j]%bs; 308 } else col = in[j]; 309 if (roworiented) value = v[i*n+j]; 310 else value = v[i+j*m]; 311 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 312 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 313 } 314 } 315 } else { /* off processor entry */ 316 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 317 if (!baij->donotstash) { 318 mat->assembled = PETSC_FALSE; 319 n_loc = 0; 320 for (j=0; j<n; j++) { 321 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 322 in_loc[n_loc] = in[j]; 323 if (roworiented) { 324 v_loc[n_loc] = v[i*n+j]; 325 } else { 326 v_loc[n_loc] = v[j*m+i]; 327 } 328 n_loc++; 329 } 330 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE)); 331 } 332 } 333 } 334 PetscFunctionReturn(0); 335 } 336 337 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 338 { 339 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 340 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 341 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 342 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 343 PetscBool roworiented=a->roworiented; 344 const PetscScalar *value = v; 345 MatScalar *ap,*aa = a->a,*bap; 346 347 PetscFunctionBegin; 348 if (col < row) { 349 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 350 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 351 } 352 rp = aj + ai[row]; 353 ap = aa + bs2*ai[row]; 354 rmax = imax[row]; 355 nrow = ailen[row]; 356 value = v; 357 low = 0; 358 high = nrow; 359 360 while (high-low > 7) { 361 t = (low+high)/2; 362 if (rp[t] > col) high = t; 363 else low = t; 364 } 365 for (i=low; i<high; i++) { 366 if (rp[i] > col) break; 367 if (rp[i] == col) { 368 bap = ap + bs2*i; 369 if (roworiented) { 370 if (is == ADD_VALUES) { 371 for (ii=0; ii<bs; ii++) { 372 for (jj=ii; jj<bs2; jj+=bs) { 373 bap[jj] += *value++; 374 } 375 } 376 } else { 377 for (ii=0; ii<bs; ii++) { 378 for (jj=ii; jj<bs2; jj+=bs) { 379 bap[jj] = *value++; 380 } 381 } 382 } 383 } else { 384 if (is == ADD_VALUES) { 385 for (ii=0; ii<bs; ii++) { 386 for (jj=0; jj<bs; jj++) { 387 *bap++ += *value++; 388 } 389 } 390 } else { 391 for (ii=0; ii<bs; ii++) { 392 for (jj=0; jj<bs; jj++) { 393 *bap++ = *value++; 394 } 395 } 396 } 397 } 398 goto noinsert2; 399 } 400 } 401 if (nonew == 1) goto noinsert2; 402 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 403 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 404 N = nrow++ - 1; high++; 405 /* shift up all the later entries in this row */ 406 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 407 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 408 rp[i] = col; 409 bap = ap + bs2*i; 410 if (roworiented) { 411 for (ii=0; ii<bs; ii++) { 412 for (jj=ii; jj<bs2; jj+=bs) { 413 bap[jj] = *value++; 414 } 415 } 416 } else { 417 for (ii=0; ii<bs; ii++) { 418 for (jj=0; jj<bs; jj++) { 419 *bap++ = *value++; 420 } 421 } 422 } 423 noinsert2:; 424 ailen[row] = nrow; 425 PetscFunctionReturn(0); 426 } 427 428 /* 429 This routine is exactly duplicated in mpibaij.c 430 */ 431 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 432 { 433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 434 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 435 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 436 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 437 PetscBool roworiented=a->roworiented; 438 const PetscScalar *value = v; 439 MatScalar *ap,*aa = a->a,*bap; 440 441 PetscFunctionBegin; 442 rp = aj + ai[row]; 443 ap = aa + bs2*ai[row]; 444 rmax = imax[row]; 445 nrow = ailen[row]; 446 low = 0; 447 high = nrow; 448 value = v; 449 while (high-low > 7) { 450 t = (low+high)/2; 451 if (rp[t] > col) high = t; 452 else low = t; 453 } 454 for (i=low; i<high; i++) { 455 if (rp[i] > col) break; 456 if (rp[i] == col) { 457 bap = ap + bs2*i; 458 if (roworiented) { 459 if (is == ADD_VALUES) { 460 for (ii=0; ii<bs; ii++) { 461 for (jj=ii; jj<bs2; jj+=bs) { 462 bap[jj] += *value++; 463 } 464 } 465 } else { 466 for (ii=0; ii<bs; ii++) { 467 for (jj=ii; jj<bs2; jj+=bs) { 468 bap[jj] = *value++; 469 } 470 } 471 } 472 } else { 473 if (is == ADD_VALUES) { 474 for (ii=0; ii<bs; ii++,value+=bs) { 475 for (jj=0; jj<bs; jj++) { 476 bap[jj] += value[jj]; 477 } 478 bap += bs; 479 } 480 } else { 481 for (ii=0; ii<bs; ii++,value+=bs) { 482 for (jj=0; jj<bs; jj++) { 483 bap[jj] = value[jj]; 484 } 485 bap += bs; 486 } 487 } 488 } 489 goto noinsert2; 490 } 491 } 492 if (nonew == 1) goto noinsert2; 493 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 494 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 495 N = nrow++ - 1; high++; 496 /* shift up all the later entries in this row */ 497 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 498 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 499 rp[i] = col; 500 bap = ap + bs2*i; 501 if (roworiented) { 502 for (ii=0; ii<bs; ii++) { 503 for (jj=ii; jj<bs2; jj+=bs) { 504 bap[jj] = *value++; 505 } 506 } 507 } else { 508 for (ii=0; ii<bs; ii++) { 509 for (jj=0; jj<bs; jj++) { 510 *bap++ = *value++; 511 } 512 } 513 } 514 noinsert2:; 515 ailen[row] = nrow; 516 PetscFunctionReturn(0); 517 } 518 519 /* 520 This routine could be optimized by removing the need for the block copy below and passing stride information 521 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 522 */ 523 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 524 { 525 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 526 const MatScalar *value; 527 MatScalar *barray =baij->barray; 528 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 529 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 530 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 531 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 532 533 PetscFunctionBegin; 534 if (!barray) { 535 PetscCall(PetscMalloc1(bs2,&barray)); 536 baij->barray = barray; 537 } 538 539 if (roworiented) { 540 stepval = (n-1)*bs; 541 } else { 542 stepval = (m-1)*bs; 543 } 544 for (i=0; i<m; i++) { 545 if (im[i] < 0) continue; 546 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 547 if (im[i] >= rstart && im[i] < rend) { 548 row = im[i] - rstart; 549 for (j=0; j<n; j++) { 550 if (im[i] > in[j]) { 551 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 552 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 553 } 554 /* If NumCol = 1 then a copy is not required */ 555 if ((roworiented) && (n == 1)) { 556 barray = (MatScalar*) v + i*bs2; 557 } else if ((!roworiented) && (m == 1)) { 558 barray = (MatScalar*) v + j*bs2; 559 } else { /* Here a copy is required */ 560 if (roworiented) { 561 value = v + i*(stepval+bs)*bs + j*bs; 562 } else { 563 value = v + j*(stepval+bs)*bs + i*bs; 564 } 565 for (ii=0; ii<bs; ii++,value+=stepval) { 566 for (jj=0; jj<bs; jj++) { 567 *barray++ = *value++; 568 } 569 } 570 barray -=bs2; 571 } 572 573 if (in[j] >= cstart && in[j] < cend) { 574 col = in[j] - cstart; 575 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 576 } else if (in[j] < 0) { 577 continue; 578 } else { 579 PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 580 if (mat->was_assembled) { 581 if (!baij->colmap) { 582 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 583 } 584 585 #if defined(PETSC_USE_DEBUG) 586 #if defined(PETSC_USE_CTABLE) 587 { PetscInt data; 588 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 589 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 590 } 591 #else 592 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 593 #endif 594 #endif 595 #if defined(PETSC_USE_CTABLE) 596 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 597 col = (col - 1)/bs; 598 #else 599 col = (baij->colmap[in[j]] - 1)/bs; 600 #endif 601 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 602 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 603 col = in[j]; 604 } 605 } else col = in[j]; 606 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 607 } 608 } 609 } else { 610 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 611 if (!baij->donotstash) { 612 if (roworiented) { 613 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 614 } else { 615 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 616 } 617 } 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 624 { 625 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 626 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 627 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 628 629 PetscFunctionBegin; 630 for (i=0; i<m; i++) { 631 if (idxm[i] < 0) continue; /* negative row */ 632 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 633 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 634 row = idxm[i] - bsrstart; 635 for (j=0; j<n; j++) { 636 if (idxn[j] < 0) continue; /* negative column */ 637 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 638 if (idxn[j] >= bscstart && idxn[j] < bscend) { 639 col = idxn[j] - bscstart; 640 PetscCall(MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j)); 641 } else { 642 if (!baij->colmap) { 643 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 644 } 645 #if defined(PETSC_USE_CTABLE) 646 PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data)); 647 data--; 648 #else 649 data = baij->colmap[idxn[j]/bs]-1; 650 #endif 651 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 652 else { 653 col = data + idxn[j]%bs; 654 PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j)); 655 } 656 } 657 } 658 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 659 } 660 PetscFunctionReturn(0); 661 } 662 663 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 664 { 665 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 666 PetscReal sum[2],*lnorm2; 667 668 PetscFunctionBegin; 669 if (baij->size == 1) { 670 PetscCall(MatNorm(baij->A,type,norm)); 671 } else { 672 if (type == NORM_FROBENIUS) { 673 PetscCall(PetscMalloc1(2,&lnorm2)); 674 PetscCall(MatNorm(baij->A,type,lnorm2)); 675 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 676 PetscCall(MatNorm(baij->B,type,lnorm2)); 677 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 678 PetscCall(MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 679 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 680 PetscCall(PetscFree(lnorm2)); 681 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 682 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 683 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 684 PetscReal *rsum,*rsum2,vabs; 685 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 686 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 687 MatScalar *v; 688 689 PetscCall(PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2)); 690 PetscCall(PetscArrayzero(rsum,mat->cmap->N)); 691 /* Amat */ 692 v = amat->a; jj = amat->j; 693 for (brow=0; brow<mbs; brow++) { 694 grow = bs*(rstart + brow); 695 nz = amat->i[brow+1] - amat->i[brow]; 696 for (bcol=0; bcol<nz; bcol++) { 697 gcol = bs*(rstart + *jj); jj++; 698 for (col=0; col<bs; col++) { 699 for (row=0; row<bs; row++) { 700 vabs = PetscAbsScalar(*v); v++; 701 rsum[gcol+col] += vabs; 702 /* non-diagonal block */ 703 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 704 } 705 } 706 } 707 PetscCall(PetscLogFlops(nz*bs*bs)); 708 } 709 /* Bmat */ 710 v = bmat->a; jj = bmat->j; 711 for (brow=0; brow<mbs; brow++) { 712 grow = bs*(rstart + brow); 713 nz = bmat->i[brow+1] - bmat->i[brow]; 714 for (bcol=0; bcol<nz; bcol++) { 715 gcol = bs*garray[*jj]; jj++; 716 for (col=0; col<bs; col++) { 717 for (row=0; row<bs; row++) { 718 vabs = PetscAbsScalar(*v); v++; 719 rsum[gcol+col] += vabs; 720 rsum[grow+row] += vabs; 721 } 722 } 723 } 724 PetscCall(PetscLogFlops(nz*bs*bs)); 725 } 726 PetscCall(MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 727 *norm = 0.0; 728 for (col=0; col<mat->cmap->N; col++) { 729 if (rsum2[col] > *norm) *norm = rsum2[col]; 730 } 731 PetscCall(PetscFree2(rsum,rsum2)); 732 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 733 } 734 PetscFunctionReturn(0); 735 } 736 737 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 738 { 739 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 740 PetscInt nstash,reallocs; 741 742 PetscFunctionBegin; 743 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 744 745 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 746 PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs)); 747 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 748 PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 750 PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 751 PetscFunctionReturn(0); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 755 { 756 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 757 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 758 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 759 PetscInt *row,*col; 760 PetscBool other_disassembled; 761 PetscMPIInt n; 762 PetscBool r1,r2,r3; 763 MatScalar *val; 764 765 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 766 PetscFunctionBegin; 767 if (!baij->donotstash && !mat->nooffprocentries) { 768 while (1) { 769 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 770 if (!flg) break; 771 772 for (i=0; i<n;) { 773 /* Now identify the consecutive vals belonging to the same row */ 774 for (j=i,rstart=row[j]; j<n; j++) { 775 if (row[j] != rstart) break; 776 } 777 if (j < n) ncols = j-i; 778 else ncols = n-i; 779 /* Now assemble all these values with a single function call */ 780 PetscCall(MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 781 i = j; 782 } 783 } 784 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 785 /* Now process the block-stash. Since the values are stashed column-oriented, 786 set the roworiented flag to column oriented, and after MatSetValues() 787 restore the original flags */ 788 r1 = baij->roworiented; 789 r2 = a->roworiented; 790 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 791 792 baij->roworiented = PETSC_FALSE; 793 a->roworiented = PETSC_FALSE; 794 795 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 796 while (1) { 797 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg)); 798 if (!flg) break; 799 800 for (i=0; i<n;) { 801 /* Now identify the consecutive vals belonging to the same row */ 802 for (j=i,rstart=row[j]; j<n; j++) { 803 if (row[j] != rstart) break; 804 } 805 if (j < n) ncols = j-i; 806 else ncols = n-i; 807 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode)); 808 i = j; 809 } 810 } 811 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 812 813 baij->roworiented = r1; 814 a->roworiented = r2; 815 816 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 817 } 818 819 PetscCall(MatAssemblyBegin(baij->A,mode)); 820 PetscCall(MatAssemblyEnd(baij->A,mode)); 821 822 /* determine if any processor has disassembled, if so we must 823 also disassemble ourselves, in order that we may reassemble. */ 824 /* 825 if nonzero structure of submatrix B cannot change then we know that 826 no processor disassembled thus we can skip this stuff 827 */ 828 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 829 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 830 if (mat->was_assembled && !other_disassembled) { 831 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 832 } 833 } 834 835 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 836 PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ 837 } 838 PetscCall(MatAssemblyBegin(baij->B,mode)); 839 PetscCall(MatAssemblyEnd(baij->B,mode)); 840 841 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 842 843 baij->rowvalues = NULL; 844 845 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 846 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 847 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 848 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 849 } 850 PetscFunctionReturn(0); 851 } 852 853 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 854 #include <petscdraw.h> 855 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 856 { 857 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 858 PetscInt bs = mat->rmap->bs; 859 PetscMPIInt rank = baij->rank; 860 PetscBool iascii,isdraw; 861 PetscViewer sviewer; 862 PetscViewerFormat format; 863 864 PetscFunctionBegin; 865 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 866 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 867 if (iascii) { 868 PetscCall(PetscViewerGetFormat(viewer,&format)); 869 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 870 MatInfo info; 871 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 872 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 873 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 874 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory)); 875 PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info)); 876 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 877 PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info)); 878 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 879 PetscCall(PetscViewerFlush(viewer)); 880 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 881 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 882 PetscCall(VecScatterView(baij->Mvctx,viewer)); 883 PetscFunctionReturn(0); 884 } else if (format == PETSC_VIEWER_ASCII_INFO) { 885 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 886 PetscFunctionReturn(0); 887 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 888 PetscFunctionReturn(0); 889 } 890 } 891 892 if (isdraw) { 893 PetscDraw draw; 894 PetscBool isnull; 895 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 896 PetscCall(PetscDrawIsNull(draw,&isnull)); 897 if (isnull) PetscFunctionReturn(0); 898 } 899 900 { 901 /* assemble the entire matrix onto first processor. */ 902 Mat A; 903 Mat_SeqSBAIJ *Aloc; 904 Mat_SeqBAIJ *Bloc; 905 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 906 MatScalar *a; 907 const char *matname; 908 909 /* Should this be the same type as mat? */ 910 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 911 if (rank == 0) { 912 PetscCall(MatSetSizes(A,M,N,M,N)); 913 } else { 914 PetscCall(MatSetSizes(A,0,0,M,N)); 915 } 916 PetscCall(MatSetType(A,MATMPISBAIJ)); 917 PetscCall(MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL)); 918 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 919 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 920 921 /* copy over the A part */ 922 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 923 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 924 PetscCall(PetscMalloc1(bs,&rvals)); 925 926 for (i=0; i<mbs; i++) { 927 rvals[0] = bs*(baij->rstartbs + i); 928 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 929 for (j=ai[i]; j<ai[i+1]; j++) { 930 col = (baij->cstartbs+aj[j])*bs; 931 for (k=0; k<bs; k++) { 932 PetscCall(MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 933 col++; 934 a += bs; 935 } 936 } 937 } 938 /* copy over the B part */ 939 Bloc = (Mat_SeqBAIJ*)baij->B->data; 940 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 941 for (i=0; i<mbs; i++) { 942 943 rvals[0] = bs*(baij->rstartbs + i); 944 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 945 for (j=ai[i]; j<ai[i+1]; j++) { 946 col = baij->garray[aj[j]]*bs; 947 for (k=0; k<bs; k++) { 948 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 949 col++; 950 a += bs; 951 } 952 } 953 } 954 PetscCall(PetscFree(rvals)); 955 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 956 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 957 /* 958 Everyone has to call to draw the matrix since the graphics waits are 959 synchronized across all processors that share the PetscDraw object 960 */ 961 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 962 PetscCall(PetscObjectGetName((PetscObject)mat,&matname)); 963 if (rank == 0) { 964 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname)); 965 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer)); 966 } 967 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 968 PetscCall(PetscViewerFlush(viewer)); 969 PetscCall(MatDestroy(&A)); 970 } 971 PetscFunctionReturn(0); 972 } 973 974 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 975 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 976 977 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 978 { 979 PetscBool iascii,isdraw,issocket,isbinary; 980 981 PetscFunctionBegin; 982 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 983 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 984 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 985 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 986 if (iascii || isdraw || issocket) { 987 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer)); 988 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat,viewer)); 989 PetscFunctionReturn(0); 990 } 991 992 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 993 { 994 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 995 996 PetscFunctionBegin; 997 #if defined(PETSC_USE_LOG) 998 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 999 #endif 1000 PetscCall(MatStashDestroy_Private(&mat->stash)); 1001 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1002 PetscCall(MatDestroy(&baij->A)); 1003 PetscCall(MatDestroy(&baij->B)); 1004 #if defined(PETSC_USE_CTABLE) 1005 PetscCall(PetscTableDestroy(&baij->colmap)); 1006 #else 1007 PetscCall(PetscFree(baij->colmap)); 1008 #endif 1009 PetscCall(PetscFree(baij->garray)); 1010 PetscCall(VecDestroy(&baij->lvec)); 1011 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1012 PetscCall(VecDestroy(&baij->slvec0)); 1013 PetscCall(VecDestroy(&baij->slvec0b)); 1014 PetscCall(VecDestroy(&baij->slvec1)); 1015 PetscCall(VecDestroy(&baij->slvec1a)); 1016 PetscCall(VecDestroy(&baij->slvec1b)); 1017 PetscCall(VecScatterDestroy(&baij->sMvctx)); 1018 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 1019 PetscCall(PetscFree(baij->barray)); 1020 PetscCall(PetscFree(baij->hd)); 1021 PetscCall(VecDestroy(&baij->diag)); 1022 PetscCall(VecDestroy(&baij->bb1)); 1023 PetscCall(VecDestroy(&baij->xx1)); 1024 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1025 PetscCall(PetscFree(baij->setvaluescopy)); 1026 #endif 1027 PetscCall(PetscFree(baij->in_loc)); 1028 PetscCall(PetscFree(baij->v_loc)); 1029 PetscCall(PetscFree(baij->rangebs)); 1030 PetscCall(PetscFree(mat->data)); 1031 1032 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1033 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1034 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1035 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL)); 1036 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL)); 1037 #if defined(PETSC_HAVE_ELEMENTAL) 1038 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL)); 1039 #endif 1040 #if defined(PETSC_HAVE_SCALAPACK) 1041 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL)); 1042 #endif 1043 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL)); 1044 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL)); 1045 PetscFunctionReturn(0); 1046 } 1047 1048 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1049 { 1050 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1051 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1052 PetscScalar *from; 1053 const PetscScalar *x; 1054 1055 PetscFunctionBegin; 1056 /* diagonal part */ 1057 PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a)); 1058 PetscCall(VecSet(a->slvec1b,0.0)); 1059 1060 /* subdiagonal part */ 1061 PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name); 1062 PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b)); 1063 1064 /* copy x into the vec slvec0 */ 1065 PetscCall(VecGetArray(a->slvec0,&from)); 1066 PetscCall(VecGetArrayRead(xx,&x)); 1067 1068 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1069 PetscCall(VecRestoreArray(a->slvec0,&from)); 1070 PetscCall(VecRestoreArrayRead(xx,&x)); 1071 1072 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1073 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1074 /* supperdiagonal part */ 1075 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy)); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1080 { 1081 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1082 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1083 PetscScalar *from; 1084 const PetscScalar *x; 1085 1086 PetscFunctionBegin; 1087 /* diagonal part */ 1088 PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a)); 1089 PetscCall(VecSet(a->slvec1b,0.0)); 1090 1091 /* subdiagonal part */ 1092 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b)); 1093 1094 /* copy x into the vec slvec0 */ 1095 PetscCall(VecGetArray(a->slvec0,&from)); 1096 PetscCall(VecGetArrayRead(xx,&x)); 1097 1098 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1099 PetscCall(VecRestoreArray(a->slvec0,&from)); 1100 PetscCall(VecRestoreArrayRead(xx,&x)); 1101 1102 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1103 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1104 /* supperdiagonal part */ 1105 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy)); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz) 1110 { 1111 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1112 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1113 PetscScalar *from,zero=0.0; 1114 const PetscScalar *x; 1115 1116 PetscFunctionBegin; 1117 /* diagonal part */ 1118 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a)); 1119 PetscCall(VecSet(a->slvec1b,zero)); 1120 1121 /* subdiagonal part */ 1122 PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name); 1123 PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b)); 1124 1125 /* copy x into the vec slvec0 */ 1126 PetscCall(VecGetArray(a->slvec0,&from)); 1127 PetscCall(VecGetArrayRead(xx,&x)); 1128 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1129 PetscCall(VecRestoreArray(a->slvec0,&from)); 1130 1131 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1132 PetscCall(VecRestoreArrayRead(xx,&x)); 1133 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1134 1135 /* supperdiagonal part */ 1136 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz)); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1141 { 1142 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1143 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1144 PetscScalar *from,zero=0.0; 1145 const PetscScalar *x; 1146 1147 PetscFunctionBegin; 1148 /* diagonal part */ 1149 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a)); 1150 PetscCall(VecSet(a->slvec1b,zero)); 1151 1152 /* subdiagonal part */ 1153 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b)); 1154 1155 /* copy x into the vec slvec0 */ 1156 PetscCall(VecGetArray(a->slvec0,&from)); 1157 PetscCall(VecGetArrayRead(xx,&x)); 1158 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1159 PetscCall(VecRestoreArray(a->slvec0,&from)); 1160 1161 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1162 PetscCall(VecRestoreArrayRead(xx,&x)); 1163 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1164 1165 /* supperdiagonal part */ 1166 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz)); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 /* 1171 This only works correctly for square matrices where the subblock A->A is the 1172 diagonal block 1173 */ 1174 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1175 { 1176 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1177 1178 PetscFunctionBegin; 1179 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1180 PetscCall(MatGetDiagonal(a->A,v)); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1185 { 1186 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1187 1188 PetscFunctionBegin; 1189 PetscCall(MatScale(a->A,aa)); 1190 PetscCall(MatScale(a->B,aa)); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1195 { 1196 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1197 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1198 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1199 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1200 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1201 1202 PetscFunctionBegin; 1203 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1204 mat->getrowactive = PETSC_TRUE; 1205 1206 if (!mat->rowvalues && (idx || v)) { 1207 /* 1208 allocate enough space to hold information from the longest row. 1209 */ 1210 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1211 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1212 PetscInt max = 1,mbs = mat->mbs,tmp; 1213 for (i=0; i<mbs; i++) { 1214 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1215 if (max < tmp) max = tmp; 1216 } 1217 PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices)); 1218 } 1219 1220 PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1221 lrow = row - brstart; /* local row index */ 1222 1223 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1224 if (!v) {pvA = NULL; pvB = NULL;} 1225 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1226 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1227 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1228 nztot = nzA + nzB; 1229 1230 cmap = mat->garray; 1231 if (v || idx) { 1232 if (nztot) { 1233 /* Sort by increasing column numbers, assuming A and B already sorted */ 1234 PetscInt imark = -1; 1235 if (v) { 1236 *v = v_p = mat->rowvalues; 1237 for (i=0; i<nzB; i++) { 1238 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1239 else break; 1240 } 1241 imark = i; 1242 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1243 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1244 } 1245 if (idx) { 1246 *idx = idx_p = mat->rowindices; 1247 if (imark > -1) { 1248 for (i=0; i<imark; i++) { 1249 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1250 } 1251 } else { 1252 for (i=0; i<nzB; i++) { 1253 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1254 else break; 1255 } 1256 imark = i; 1257 } 1258 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1259 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1260 } 1261 } else { 1262 if (idx) *idx = NULL; 1263 if (v) *v = NULL; 1264 } 1265 } 1266 *nz = nztot; 1267 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1268 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1273 { 1274 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1275 1276 PetscFunctionBegin; 1277 PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1278 baij->getrowactive = PETSC_FALSE; 1279 PetscFunctionReturn(0); 1280 } 1281 1282 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1283 { 1284 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1285 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1286 1287 PetscFunctionBegin; 1288 aA->getrow_utriangular = PETSC_TRUE; 1289 PetscFunctionReturn(0); 1290 } 1291 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1292 { 1293 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1294 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1295 1296 PetscFunctionBegin; 1297 aA->getrow_utriangular = PETSC_FALSE; 1298 PetscFunctionReturn(0); 1299 } 1300 1301 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1302 { 1303 PetscFunctionBegin; 1304 if (PetscDefined(USE_COMPLEX)) { 1305 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 1306 1307 PetscCall(MatConjugate(a->A)); 1308 PetscCall(MatConjugate(a->B)); 1309 } 1310 PetscFunctionReturn(0); 1311 } 1312 1313 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1314 { 1315 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1316 1317 PetscFunctionBegin; 1318 PetscCall(MatRealPart(a->A)); 1319 PetscCall(MatRealPart(a->B)); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1324 { 1325 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1326 1327 PetscFunctionBegin; 1328 PetscCall(MatImaginaryPart(a->A)); 1329 PetscCall(MatImaginaryPart(a->B)); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1334 Input: isrow - distributed(parallel), 1335 iscol_local - locally owned (seq) 1336 */ 1337 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1338 { 1339 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1340 const PetscInt *ptr1,*ptr2; 1341 1342 PetscFunctionBegin; 1343 PetscCall(ISGetLocalSize(isrow,&sz1)); 1344 PetscCall(ISGetLocalSize(iscol_local,&sz2)); 1345 if (sz1 > sz2) { 1346 *flg = PETSC_FALSE; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 PetscCall(ISGetIndices(isrow,&ptr1)); 1351 PetscCall(ISGetIndices(iscol_local,&ptr2)); 1352 1353 PetscCall(PetscMalloc1(sz1,&a1)); 1354 PetscCall(PetscMalloc1(sz2,&a2)); 1355 PetscCall(PetscArraycpy(a1,ptr1,sz1)); 1356 PetscCall(PetscArraycpy(a2,ptr2,sz2)); 1357 PetscCall(PetscSortInt(sz1,a1)); 1358 PetscCall(PetscSortInt(sz2,a2)); 1359 1360 nmatch=0; 1361 k = 0; 1362 for (i=0; i<sz1; i++) { 1363 for (j=k; j<sz2; j++) { 1364 if (a1[i] == a2[j]) { 1365 k = j; nmatch++; 1366 break; 1367 } 1368 } 1369 } 1370 PetscCall(ISRestoreIndices(isrow,&ptr1)); 1371 PetscCall(ISRestoreIndices(iscol_local,&ptr2)); 1372 PetscCall(PetscFree(a1)); 1373 PetscCall(PetscFree(a2)); 1374 if (nmatch < sz1) { 1375 *flg = PETSC_FALSE; 1376 } else { 1377 *flg = PETSC_TRUE; 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1383 { 1384 IS iscol_local; 1385 PetscInt csize; 1386 PetscBool isequal; 1387 1388 PetscFunctionBegin; 1389 PetscCall(ISGetLocalSize(iscol,&csize)); 1390 if (call == MAT_REUSE_MATRIX) { 1391 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 1392 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1393 } else { 1394 PetscBool issorted; 1395 1396 PetscCall(ISAllGather(iscol,&iscol_local)); 1397 PetscCall(ISEqual_private(isrow,iscol_local,&isequal)); 1398 PetscCall(ISSorted(iscol_local, &issorted)); 1399 PetscCheck(isequal && issorted,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow and be sorted"); 1400 } 1401 1402 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1403 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat)); 1404 if (call == MAT_INITIAL_MATRIX) { 1405 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 1406 PetscCall(ISDestroy(&iscol_local)); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1412 { 1413 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1414 1415 PetscFunctionBegin; 1416 PetscCall(MatZeroEntries(l->A)); 1417 PetscCall(MatZeroEntries(l->B)); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1422 { 1423 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1424 Mat A = a->A,B = a->B; 1425 PetscLogDouble isend[5],irecv[5]; 1426 1427 PetscFunctionBegin; 1428 info->block_size = (PetscReal)matin->rmap->bs; 1429 1430 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1431 1432 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1433 isend[3] = info->memory; isend[4] = info->mallocs; 1434 1435 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1436 1437 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1438 isend[3] += info->memory; isend[4] += info->mallocs; 1439 if (flag == MAT_LOCAL) { 1440 info->nz_used = isend[0]; 1441 info->nz_allocated = isend[1]; 1442 info->nz_unneeded = isend[2]; 1443 info->memory = isend[3]; 1444 info->mallocs = isend[4]; 1445 } else if (flag == MAT_GLOBAL_MAX) { 1446 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1447 1448 info->nz_used = irecv[0]; 1449 info->nz_allocated = irecv[1]; 1450 info->nz_unneeded = irecv[2]; 1451 info->memory = irecv[3]; 1452 info->mallocs = irecv[4]; 1453 } else if (flag == MAT_GLOBAL_SUM) { 1454 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1455 1456 info->nz_used = irecv[0]; 1457 info->nz_allocated = irecv[1]; 1458 info->nz_unneeded = irecv[2]; 1459 info->memory = irecv[3]; 1460 info->mallocs = irecv[4]; 1461 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1462 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1463 info->fill_ratio_needed = 0; 1464 info->factor_mallocs = 0; 1465 PetscFunctionReturn(0); 1466 } 1467 1468 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1469 { 1470 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1471 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1472 1473 PetscFunctionBegin; 1474 switch (op) { 1475 case MAT_NEW_NONZERO_LOCATIONS: 1476 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1477 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1478 case MAT_KEEP_NONZERO_PATTERN: 1479 case MAT_SUBMAT_SINGLEIS: 1480 case MAT_NEW_NONZERO_LOCATION_ERR: 1481 MatCheckPreallocated(A,1); 1482 PetscCall(MatSetOption(a->A,op,flg)); 1483 PetscCall(MatSetOption(a->B,op,flg)); 1484 break; 1485 case MAT_ROW_ORIENTED: 1486 MatCheckPreallocated(A,1); 1487 a->roworiented = flg; 1488 1489 PetscCall(MatSetOption(a->A,op,flg)); 1490 PetscCall(MatSetOption(a->B,op,flg)); 1491 break; 1492 case MAT_FORCE_DIAGONAL_ENTRIES: 1493 case MAT_SORTED_FULL: 1494 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1495 break; 1496 case MAT_IGNORE_OFF_PROC_ENTRIES: 1497 a->donotstash = flg; 1498 break; 1499 case MAT_USE_HASH_TABLE: 1500 a->ht_flag = flg; 1501 break; 1502 case MAT_HERMITIAN: 1503 MatCheckPreallocated(A,1); 1504 PetscCall(MatSetOption(a->A,op,flg)); 1505 #if defined(PETSC_USE_COMPLEX) 1506 if (flg) { /* need different mat-vec ops */ 1507 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1508 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1509 A->ops->multtranspose = NULL; 1510 A->ops->multtransposeadd = NULL; 1511 A->symmetric = PETSC_BOOL3_FALSE; 1512 } 1513 #endif 1514 break; 1515 case MAT_SPD: 1516 case MAT_SYMMETRIC: 1517 MatCheckPreallocated(A,1); 1518 PetscCall(MatSetOption(a->A,op,flg)); 1519 #if defined(PETSC_USE_COMPLEX) 1520 if (flg) { /* restore to use default mat-vec ops */ 1521 A->ops->mult = MatMult_MPISBAIJ; 1522 A->ops->multadd = MatMultAdd_MPISBAIJ; 1523 A->ops->multtranspose = MatMult_MPISBAIJ; 1524 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1525 } 1526 #endif 1527 break; 1528 case MAT_STRUCTURALLY_SYMMETRIC: 1529 MatCheckPreallocated(A,1); 1530 PetscCall(MatSetOption(a->A,op,flg)); 1531 break; 1532 case MAT_SYMMETRY_ETERNAL: 1533 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1534 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1535 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1536 break; 1537 case MAT_SPD_ETERNAL: 1538 break; 1539 case MAT_IGNORE_LOWER_TRIANGULAR: 1540 aA->ignore_ltriangular = flg; 1541 break; 1542 case MAT_ERROR_LOWER_TRIANGULAR: 1543 aA->ignore_ltriangular = flg; 1544 break; 1545 case MAT_GETROW_UPPERTRIANGULAR: 1546 aA->getrow_utriangular = flg; 1547 break; 1548 default: 1549 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1550 } 1551 PetscFunctionReturn(0); 1552 } 1553 1554 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1555 { 1556 PetscFunctionBegin; 1557 if (reuse == MAT_INITIAL_MATRIX) { 1558 PetscCall(MatDuplicate(A,MAT_COPY_VALUES,B)); 1559 } else if (reuse == MAT_REUSE_MATRIX) { 1560 PetscCall(MatCopy(A,*B,SAME_NONZERO_PATTERN)); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1566 { 1567 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1568 Mat a = baij->A, b=baij->B; 1569 PetscInt nv,m,n; 1570 PetscBool flg; 1571 1572 PetscFunctionBegin; 1573 if (ll != rr) { 1574 PetscCall(VecEqual(ll,rr,&flg)); 1575 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same"); 1576 } 1577 if (!ll) PetscFunctionReturn(0); 1578 1579 PetscCall(MatGetLocalSize(mat,&m,&n)); 1580 PetscCheck(m == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same",m,n); 1581 1582 PetscCall(VecGetLocalSize(rr,&nv)); 1583 PetscCheck(nv==n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1584 1585 PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1586 1587 /* left diagonalscale the off-diagonal part */ 1588 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1589 1590 /* scale the diagonal part */ 1591 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1592 1593 /* right diagonalscale the off-diagonal part */ 1594 PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1595 PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec)); 1596 PetscFunctionReturn(0); 1597 } 1598 1599 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1600 { 1601 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1602 1603 PetscFunctionBegin; 1604 PetscCall(MatSetUnfactored(a->A)); 1605 PetscFunctionReturn(0); 1606 } 1607 1608 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1609 1610 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1611 { 1612 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1613 Mat a,b,c,d; 1614 PetscBool flg; 1615 1616 PetscFunctionBegin; 1617 a = matA->A; b = matA->B; 1618 c = matB->A; d = matB->B; 1619 1620 PetscCall(MatEqual(a,c,&flg)); 1621 if (flg) { 1622 PetscCall(MatEqual(b,d,&flg)); 1623 } 1624 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1625 PetscFunctionReturn(0); 1626 } 1627 1628 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1629 { 1630 PetscBool isbaij; 1631 1632 PetscFunctionBegin; 1633 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"")); 1634 PetscCheck(isbaij,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1635 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1636 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1637 PetscCall(MatGetRowUpperTriangular(A)); 1638 PetscCall(MatCopy_Basic(A,B,str)); 1639 PetscCall(MatRestoreRowUpperTriangular(A)); 1640 } else { 1641 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1642 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1643 1644 PetscCall(MatCopy(a->A,b->A,str)); 1645 PetscCall(MatCopy(a->B,b->B,str)); 1646 } 1647 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1648 PetscFunctionReturn(0); 1649 } 1650 1651 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1652 { 1653 PetscFunctionBegin; 1654 PetscCall(MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 1655 PetscFunctionReturn(0); 1656 } 1657 1658 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1659 { 1660 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1661 PetscBLASInt bnz,one=1; 1662 Mat_SeqSBAIJ *xa,*ya; 1663 Mat_SeqBAIJ *xb,*yb; 1664 1665 PetscFunctionBegin; 1666 if (str == SAME_NONZERO_PATTERN) { 1667 PetscScalar alpha = a; 1668 xa = (Mat_SeqSBAIJ*)xx->A->data; 1669 ya = (Mat_SeqSBAIJ*)yy->A->data; 1670 PetscCall(PetscBLASIntCast(xa->nz,&bnz)); 1671 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1672 xb = (Mat_SeqBAIJ*)xx->B->data; 1673 yb = (Mat_SeqBAIJ*)yy->B->data; 1674 PetscCall(PetscBLASIntCast(xb->nz,&bnz)); 1675 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1676 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1677 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1678 PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 1679 PetscCall(MatAXPY_Basic(Y,a,X,str)); 1680 PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 1681 } else { 1682 Mat B; 1683 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1684 PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1685 PetscCall(MatGetRowUpperTriangular(X)); 1686 PetscCall(MatGetRowUpperTriangular(Y)); 1687 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 1688 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 1689 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 1690 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 1691 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 1692 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 1693 PetscCall(MatSetType(B,MATMPISBAIJ)); 1694 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d)); 1695 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 1696 PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o)); 1697 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 1698 PetscCall(MatHeaderMerge(Y,&B)); 1699 PetscCall(PetscFree(nnz_d)); 1700 PetscCall(PetscFree(nnz_o)); 1701 PetscCall(MatRestoreRowUpperTriangular(X)); 1702 PetscCall(MatRestoreRowUpperTriangular(Y)); 1703 } 1704 PetscFunctionReturn(0); 1705 } 1706 1707 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1708 { 1709 PetscInt i; 1710 PetscBool flg; 1711 1712 PetscFunctionBegin; 1713 PetscCall(MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B)); /* B[] are sbaij matrices */ 1714 for (i=0; i<n; i++) { 1715 PetscCall(ISEqual(irow[i],icol[i],&flg)); 1716 if (!flg) { 1717 PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1718 } 1719 } 1720 PetscFunctionReturn(0); 1721 } 1722 1723 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1724 { 1725 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1726 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1727 1728 PetscFunctionBegin; 1729 if (!Y->preallocated) { 1730 PetscCall(MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL)); 1731 } else if (!aij->nz) { 1732 PetscInt nonew = aij->nonew; 1733 PetscCall(MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL)); 1734 aij->nonew = nonew; 1735 } 1736 PetscCall(MatShift_Basic(Y,a)); 1737 PetscFunctionReturn(0); 1738 } 1739 1740 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1741 { 1742 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1743 1744 PetscFunctionBegin; 1745 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1746 PetscCall(MatMissingDiagonal(a->A,missing,d)); 1747 if (d) { 1748 PetscInt rstart; 1749 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 1750 *d += rstart/A->rmap->bs; 1751 1752 } 1753 PetscFunctionReturn(0); 1754 } 1755 1756 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1757 { 1758 PetscFunctionBegin; 1759 *a = ((Mat_MPISBAIJ*)A->data)->A; 1760 PetscFunctionReturn(0); 1761 } 1762 1763 /* -------------------------------------------------------------------*/ 1764 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1765 MatGetRow_MPISBAIJ, 1766 MatRestoreRow_MPISBAIJ, 1767 MatMult_MPISBAIJ, 1768 /* 4*/ MatMultAdd_MPISBAIJ, 1769 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1770 MatMultAdd_MPISBAIJ, 1771 NULL, 1772 NULL, 1773 NULL, 1774 /* 10*/ NULL, 1775 NULL, 1776 NULL, 1777 MatSOR_MPISBAIJ, 1778 MatTranspose_MPISBAIJ, 1779 /* 15*/ MatGetInfo_MPISBAIJ, 1780 MatEqual_MPISBAIJ, 1781 MatGetDiagonal_MPISBAIJ, 1782 MatDiagonalScale_MPISBAIJ, 1783 MatNorm_MPISBAIJ, 1784 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1785 MatAssemblyEnd_MPISBAIJ, 1786 MatSetOption_MPISBAIJ, 1787 MatZeroEntries_MPISBAIJ, 1788 /* 24*/ NULL, 1789 NULL, 1790 NULL, 1791 NULL, 1792 NULL, 1793 /* 29*/ MatSetUp_MPISBAIJ, 1794 NULL, 1795 NULL, 1796 MatGetDiagonalBlock_MPISBAIJ, 1797 NULL, 1798 /* 34*/ MatDuplicate_MPISBAIJ, 1799 NULL, 1800 NULL, 1801 NULL, 1802 NULL, 1803 /* 39*/ MatAXPY_MPISBAIJ, 1804 MatCreateSubMatrices_MPISBAIJ, 1805 MatIncreaseOverlap_MPISBAIJ, 1806 MatGetValues_MPISBAIJ, 1807 MatCopy_MPISBAIJ, 1808 /* 44*/ NULL, 1809 MatScale_MPISBAIJ, 1810 MatShift_MPISBAIJ, 1811 NULL, 1812 NULL, 1813 /* 49*/ NULL, 1814 NULL, 1815 NULL, 1816 NULL, 1817 NULL, 1818 /* 54*/ NULL, 1819 NULL, 1820 MatSetUnfactored_MPISBAIJ, 1821 NULL, 1822 MatSetValuesBlocked_MPISBAIJ, 1823 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1824 NULL, 1825 NULL, 1826 NULL, 1827 NULL, 1828 /* 64*/ NULL, 1829 NULL, 1830 NULL, 1831 NULL, 1832 NULL, 1833 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1834 NULL, 1835 MatConvert_MPISBAIJ_Basic, 1836 NULL, 1837 NULL, 1838 /* 74*/ NULL, 1839 NULL, 1840 NULL, 1841 NULL, 1842 NULL, 1843 /* 79*/ NULL, 1844 NULL, 1845 NULL, 1846 NULL, 1847 MatLoad_MPISBAIJ, 1848 /* 84*/ NULL, 1849 NULL, 1850 NULL, 1851 NULL, 1852 NULL, 1853 /* 89*/ NULL, 1854 NULL, 1855 NULL, 1856 NULL, 1857 NULL, 1858 /* 94*/ NULL, 1859 NULL, 1860 NULL, 1861 NULL, 1862 NULL, 1863 /* 99*/ NULL, 1864 NULL, 1865 NULL, 1866 MatConjugate_MPISBAIJ, 1867 NULL, 1868 /*104*/ NULL, 1869 MatRealPart_MPISBAIJ, 1870 MatImaginaryPart_MPISBAIJ, 1871 MatGetRowUpperTriangular_MPISBAIJ, 1872 MatRestoreRowUpperTriangular_MPISBAIJ, 1873 /*109*/ NULL, 1874 NULL, 1875 NULL, 1876 NULL, 1877 MatMissingDiagonal_MPISBAIJ, 1878 /*114*/ NULL, 1879 NULL, 1880 NULL, 1881 NULL, 1882 NULL, 1883 /*119*/ NULL, 1884 NULL, 1885 NULL, 1886 NULL, 1887 NULL, 1888 /*124*/ NULL, 1889 NULL, 1890 NULL, 1891 NULL, 1892 NULL, 1893 /*129*/ NULL, 1894 NULL, 1895 NULL, 1896 NULL, 1897 NULL, 1898 /*134*/ NULL, 1899 NULL, 1900 NULL, 1901 NULL, 1902 NULL, 1903 /*139*/ MatSetBlockSizes_Default, 1904 NULL, 1905 NULL, 1906 NULL, 1907 NULL, 1908 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1909 NULL, 1910 NULL, 1911 NULL, 1912 NULL, 1913 NULL 1914 }; 1915 1916 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1917 { 1918 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1919 PetscInt i,mbs,Mbs; 1920 PetscMPIInt size; 1921 1922 PetscFunctionBegin; 1923 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 1924 PetscCall(PetscLayoutSetUp(B->rmap)); 1925 PetscCall(PetscLayoutSetUp(B->cmap)); 1926 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 1927 PetscCheck(B->rmap->N <= B->cmap->N,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->N,B->cmap->N); 1928 PetscCheck(B->rmap->n <= B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->n,B->cmap->n); 1929 1930 mbs = B->rmap->n/bs; 1931 Mbs = B->rmap->N/bs; 1932 PetscCheck(mbs*bs == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,bs); 1933 1934 B->rmap->bs = bs; 1935 b->bs2 = bs*bs; 1936 b->mbs = mbs; 1937 b->Mbs = Mbs; 1938 b->nbs = B->cmap->n/bs; 1939 b->Nbs = B->cmap->N/bs; 1940 1941 for (i=0; i<=b->size; i++) { 1942 b->rangebs[i] = B->rmap->range[i]/bs; 1943 } 1944 b->rstartbs = B->rmap->rstart/bs; 1945 b->rendbs = B->rmap->rend/bs; 1946 1947 b->cstartbs = B->cmap->rstart/bs; 1948 b->cendbs = B->cmap->rend/bs; 1949 1950 #if defined(PETSC_USE_CTABLE) 1951 PetscCall(PetscTableDestroy(&b->colmap)); 1952 #else 1953 PetscCall(PetscFree(b->colmap)); 1954 #endif 1955 PetscCall(PetscFree(b->garray)); 1956 PetscCall(VecDestroy(&b->lvec)); 1957 PetscCall(VecScatterDestroy(&b->Mvctx)); 1958 PetscCall(VecDestroy(&b->slvec0)); 1959 PetscCall(VecDestroy(&b->slvec0b)); 1960 PetscCall(VecDestroy(&b->slvec1)); 1961 PetscCall(VecDestroy(&b->slvec1a)); 1962 PetscCall(VecDestroy(&b->slvec1b)); 1963 PetscCall(VecScatterDestroy(&b->sMvctx)); 1964 1965 /* Because the B will have been resized we simply destroy it and create a new one each time */ 1966 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 1967 PetscCall(MatDestroy(&b->B)); 1968 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 1969 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 1970 PetscCall(MatSetType(b->B,MATSEQBAIJ)); 1971 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 1972 1973 if (!B->preallocated) { 1974 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 1975 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 1976 PetscCall(MatSetType(b->A,MATSEQSBAIJ)); 1977 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 1978 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash)); 1979 } 1980 1981 PetscCall(MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz)); 1982 PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz)); 1983 1984 B->preallocated = PETSC_TRUE; 1985 B->was_assembled = PETSC_FALSE; 1986 B->assembled = PETSC_FALSE; 1987 PetscFunctionReturn(0); 1988 } 1989 1990 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1991 { 1992 PetscInt m,rstart,cend; 1993 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 1994 const PetscInt *JJ =NULL; 1995 PetscScalar *values=NULL; 1996 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 1997 PetscBool nooffprocentries; 1998 1999 PetscFunctionBegin; 2000 PetscCheck(bs >= 1,PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs); 2001 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 2002 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 2003 PetscCall(PetscLayoutSetUp(B->rmap)); 2004 PetscCall(PetscLayoutSetUp(B->cmap)); 2005 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2006 m = B->rmap->n/bs; 2007 rstart = B->rmap->rstart/bs; 2008 cend = B->cmap->rend/bs; 2009 2010 PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2011 PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz)); 2012 for (i=0; i<m; i++) { 2013 nz = ii[i+1] - ii[i]; 2014 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2015 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2016 JJ = jj + ii[i]; 2017 bd = 0; 2018 for (j=0; j<nz; j++) { 2019 if (*JJ >= i + rstart) break; 2020 JJ++; 2021 bd++; 2022 } 2023 d = 0; 2024 for (; j<nz; j++) { 2025 if (*JJ++ >= cend) break; 2026 d++; 2027 } 2028 d_nnz[i] = d; 2029 o_nnz[i] = nz - d - bd; 2030 nz = nz - bd; 2031 nz_max = PetscMax(nz_max,nz); 2032 } 2033 PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz)); 2034 PetscCall(MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)); 2035 PetscCall(PetscFree2(d_nnz,o_nnz)); 2036 2037 values = (PetscScalar*)V; 2038 if (!values) { 2039 PetscCall(PetscCalloc1(bs*bs*nz_max,&values)); 2040 } 2041 for (i=0; i<m; i++) { 2042 PetscInt row = i + rstart; 2043 PetscInt ncols = ii[i+1] - ii[i]; 2044 const PetscInt *icols = jj + ii[i]; 2045 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2046 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2047 PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES)); 2048 } else { /* block ordering does not match so we can only insert one block at a time. */ 2049 PetscInt j; 2050 for (j=0; j<ncols; j++) { 2051 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2052 PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES)); 2053 } 2054 } 2055 } 2056 2057 if (!V) PetscCall(PetscFree(values)); 2058 nooffprocentries = B->nooffprocentries; 2059 B->nooffprocentries = PETSC_TRUE; 2060 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2061 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2062 B->nooffprocentries = nooffprocentries; 2063 2064 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /*MC 2069 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2070 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2071 the matrix is stored. 2072 2073 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2074 can call MatSetOption(Mat, MAT_HERMITIAN); 2075 2076 Options Database Keys: 2077 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2078 2079 Notes: 2080 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2081 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2082 2083 Level: beginner 2084 2085 .seealso: `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2086 M*/ 2087 2088 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2089 { 2090 Mat_MPISBAIJ *b; 2091 PetscBool flg = PETSC_FALSE; 2092 2093 PetscFunctionBegin; 2094 PetscCall(PetscNewLog(B,&b)); 2095 B->data = (void*)b; 2096 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 2097 2098 B->ops->destroy = MatDestroy_MPISBAIJ; 2099 B->ops->view = MatView_MPISBAIJ; 2100 B->assembled = PETSC_FALSE; 2101 B->insertmode = NOT_SET_VALUES; 2102 2103 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 2104 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size)); 2105 2106 /* build local table of row and column ownerships */ 2107 PetscCall(PetscMalloc1(b->size+2,&b->rangebs)); 2108 2109 /* build cache for off array entries formed */ 2110 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 2111 2112 b->donotstash = PETSC_FALSE; 2113 b->colmap = NULL; 2114 b->garray = NULL; 2115 b->roworiented = PETSC_TRUE; 2116 2117 /* stuff used in block assembly */ 2118 b->barray = NULL; 2119 2120 /* stuff used for matrix vector multiply */ 2121 b->lvec = NULL; 2122 b->Mvctx = NULL; 2123 b->slvec0 = NULL; 2124 b->slvec0b = NULL; 2125 b->slvec1 = NULL; 2126 b->slvec1a = NULL; 2127 b->slvec1b = NULL; 2128 b->sMvctx = NULL; 2129 2130 /* stuff for MatGetRow() */ 2131 b->rowindices = NULL; 2132 b->rowvalues = NULL; 2133 b->getrowactive = PETSC_FALSE; 2134 2135 /* hash table stuff */ 2136 b->ht = NULL; 2137 b->hd = NULL; 2138 b->ht_size = 0; 2139 b->ht_flag = PETSC_FALSE; 2140 b->ht_fact = 0; 2141 b->ht_total_ct = 0; 2142 b->ht_insert_ct = 0; 2143 2144 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2145 b->ijonly = PETSC_FALSE; 2146 2147 b->in_loc = NULL; 2148 b->v_loc = NULL; 2149 b->n_loc = 0; 2150 2151 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ)); 2152 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ)); 2153 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ)); 2154 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2155 #if defined(PETSC_HAVE_ELEMENTAL) 2156 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental)); 2157 #endif 2158 #if defined(PETSC_HAVE_SCALAPACK) 2159 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK)); 2160 #endif 2161 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic)); 2162 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic)); 2163 2164 B->symmetric = PETSC_BOOL3_TRUE; 2165 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2166 B->symmetry_eternal = PETSC_TRUE; 2167 B->structural_symmetry_eternal = PETSC_TRUE ; 2168 #if defined(PETSC_USE_COMPLEX) 2169 B->hermitian = PETSC_BOOL3_FALSE; 2170 #else 2171 B->hermitian = PETSC_BOOL3_TRUE; 2172 #endif 2173 2174 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ)); 2175 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat"); 2176 PetscCall(PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL)); 2177 if (flg) { 2178 PetscReal fact = 1.39; 2179 PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE)); 2180 PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL)); 2181 if (fact <= 1.0) fact = 1.39; 2182 PetscCall(MatMPIBAIJSetHashTableFactor(B,fact)); 2183 PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact)); 2184 } 2185 PetscOptionsEnd(); 2186 PetscFunctionReturn(0); 2187 } 2188 2189 /*MC 2190 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2191 2192 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2193 and MATMPISBAIJ otherwise. 2194 2195 Options Database Keys: 2196 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2197 2198 Level: beginner 2199 2200 .seealso: `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2201 M*/ 2202 2203 /*@C 2204 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2205 the user should preallocate the matrix storage by setting the parameters 2206 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2207 performance can be increased by more than a factor of 50. 2208 2209 Collective on Mat 2210 2211 Input Parameters: 2212 + B - the matrix 2213 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2214 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2215 . d_nz - number of block nonzeros per block row in diagonal portion of local 2216 submatrix (same for all local rows) 2217 . d_nnz - array containing the number of block nonzeros in the various block rows 2218 in the upper triangular and diagonal part of the in diagonal portion of the local 2219 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2220 for the diagonal entry and set a value even if it is zero. 2221 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2222 submatrix (same for all local rows). 2223 - o_nnz - array containing the number of nonzeros in the various block rows of the 2224 off-diagonal portion of the local submatrix that is right of the diagonal 2225 (possibly different for each block row) or NULL. 2226 2227 Options Database Keys: 2228 + -mat_no_unroll - uses code that does not unroll the loops in the 2229 block calculations (much slower) 2230 - -mat_block_size - size of the blocks to use 2231 2232 Notes: 2233 2234 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2235 than it must be used on all processors that share the object for that argument. 2236 2237 If the *_nnz parameter is given then the *_nz parameter is ignored 2238 2239 Storage Information: 2240 For a square global matrix we define each processor's diagonal portion 2241 to be its local rows and the corresponding columns (a square submatrix); 2242 each processor's off-diagonal portion encompasses the remainder of the 2243 local matrix (a rectangular submatrix). 2244 2245 The user can specify preallocated storage for the diagonal part of 2246 the local submatrix with either d_nz or d_nnz (not both). Set 2247 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2248 memory allocation. Likewise, specify preallocated storage for the 2249 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2250 2251 You can call MatGetInfo() to get information on how effective the preallocation was; 2252 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2253 You can also run with the option -info and look for messages with the string 2254 malloc in them to see if additional memory allocation was needed. 2255 2256 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2257 the figure below we depict these three local rows and all columns (0-11). 2258 2259 .vb 2260 0 1 2 3 4 5 6 7 8 9 10 11 2261 -------------------------- 2262 row 3 |. . . d d d o o o o o o 2263 row 4 |. . . d d d o o o o o o 2264 row 5 |. . . d d d o o o o o o 2265 -------------------------- 2266 .ve 2267 2268 Thus, any entries in the d locations are stored in the d (diagonal) 2269 submatrix, and any entries in the o locations are stored in the 2270 o (off-diagonal) submatrix. Note that the d matrix is stored in 2271 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2272 2273 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2274 plus the diagonal part of the d matrix, 2275 and o_nz should indicate the number of block nonzeros per row in the o matrix 2276 2277 In general, for PDE problems in which most nonzeros are near the diagonal, 2278 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2279 or you will get TERRIBLE performance; see the users' manual chapter on 2280 matrices. 2281 2282 Level: intermediate 2283 2284 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2285 @*/ 2286 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2287 { 2288 PetscFunctionBegin; 2289 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2290 PetscValidType(B,1); 2291 PetscValidLogicalCollectiveInt(B,bs,2); 2292 PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz)); 2293 PetscFunctionReturn(0); 2294 } 2295 2296 /*@C 2297 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2298 (block compressed row). For good matrix assembly performance 2299 the user should preallocate the matrix storage by setting the parameters 2300 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2301 performance can be increased by more than a factor of 50. 2302 2303 Collective 2304 2305 Input Parameters: 2306 + comm - MPI communicator 2307 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2308 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2309 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2310 This value should be the same as the local size used in creating the 2311 y vector for the matrix-vector product y = Ax. 2312 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2313 This value should be the same as the local size used in creating the 2314 x vector for the matrix-vector product y = Ax. 2315 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2316 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2317 . d_nz - number of block nonzeros per block row in diagonal portion of local 2318 submatrix (same for all local rows) 2319 . d_nnz - array containing the number of block nonzeros in the various block rows 2320 in the upper triangular portion of the in diagonal portion of the local 2321 (possibly different for each block block row) or NULL. 2322 If you plan to factor the matrix you must leave room for the diagonal entry and 2323 set its value even if it is zero. 2324 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2325 submatrix (same for all local rows). 2326 - o_nnz - array containing the number of nonzeros in the various block rows of the 2327 off-diagonal portion of the local submatrix (possibly different for 2328 each block row) or NULL. 2329 2330 Output Parameter: 2331 . A - the matrix 2332 2333 Options Database Keys: 2334 + -mat_no_unroll - uses code that does not unroll the loops in the 2335 block calculations (much slower) 2336 . -mat_block_size - size of the blocks to use 2337 - -mat_mpi - use the parallel matrix data structures even on one processor 2338 (defaults to using SeqBAIJ format on one processor) 2339 2340 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2341 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2342 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2343 2344 Notes: 2345 The number of rows and columns must be divisible by blocksize. 2346 This matrix type does not support complex Hermitian operation. 2347 2348 The user MUST specify either the local or global matrix dimensions 2349 (possibly both). 2350 2351 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2352 than it must be used on all processors that share the object for that argument. 2353 2354 If the *_nnz parameter is given then the *_nz parameter is ignored 2355 2356 Storage Information: 2357 For a square global matrix we define each processor's diagonal portion 2358 to be its local rows and the corresponding columns (a square submatrix); 2359 each processor's off-diagonal portion encompasses the remainder of the 2360 local matrix (a rectangular submatrix). 2361 2362 The user can specify preallocated storage for the diagonal part of 2363 the local submatrix with either d_nz or d_nnz (not both). Set 2364 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2365 memory allocation. Likewise, specify preallocated storage for the 2366 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2367 2368 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2369 the figure below we depict these three local rows and all columns (0-11). 2370 2371 .vb 2372 0 1 2 3 4 5 6 7 8 9 10 11 2373 -------------------------- 2374 row 3 |. . . d d d o o o o o o 2375 row 4 |. . . d d d o o o o o o 2376 row 5 |. . . d d d o o o o o o 2377 -------------------------- 2378 .ve 2379 2380 Thus, any entries in the d locations are stored in the d (diagonal) 2381 submatrix, and any entries in the o locations are stored in the 2382 o (off-diagonal) submatrix. Note that the d matrix is stored in 2383 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2384 2385 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2386 plus the diagonal part of the d matrix, 2387 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2388 In general, for PDE problems in which most nonzeros are near the diagonal, 2389 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2390 or you will get TERRIBLE performance; see the users' manual chapter on 2391 matrices. 2392 2393 Level: intermediate 2394 2395 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2396 @*/ 2397 2398 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2399 { 2400 PetscMPIInt size; 2401 2402 PetscFunctionBegin; 2403 PetscCall(MatCreate(comm,A)); 2404 PetscCall(MatSetSizes(*A,m,n,M,N)); 2405 PetscCallMPI(MPI_Comm_size(comm,&size)); 2406 if (size > 1) { 2407 PetscCall(MatSetType(*A,MATMPISBAIJ)); 2408 PetscCall(MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz)); 2409 } else { 2410 PetscCall(MatSetType(*A,MATSEQSBAIJ)); 2411 PetscCall(MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz)); 2412 } 2413 PetscFunctionReturn(0); 2414 } 2415 2416 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2417 { 2418 Mat mat; 2419 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2420 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2421 PetscScalar *array; 2422 2423 PetscFunctionBegin; 2424 *newmat = NULL; 2425 2426 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2427 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2428 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2429 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2430 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2431 2432 mat->factortype = matin->factortype; 2433 mat->preallocated = PETSC_TRUE; 2434 mat->assembled = PETSC_TRUE; 2435 mat->insertmode = NOT_SET_VALUES; 2436 2437 a = (Mat_MPISBAIJ*)mat->data; 2438 a->bs2 = oldmat->bs2; 2439 a->mbs = oldmat->mbs; 2440 a->nbs = oldmat->nbs; 2441 a->Mbs = oldmat->Mbs; 2442 a->Nbs = oldmat->Nbs; 2443 2444 a->size = oldmat->size; 2445 a->rank = oldmat->rank; 2446 a->donotstash = oldmat->donotstash; 2447 a->roworiented = oldmat->roworiented; 2448 a->rowindices = NULL; 2449 a->rowvalues = NULL; 2450 a->getrowactive = PETSC_FALSE; 2451 a->barray = NULL; 2452 a->rstartbs = oldmat->rstartbs; 2453 a->rendbs = oldmat->rendbs; 2454 a->cstartbs = oldmat->cstartbs; 2455 a->cendbs = oldmat->cendbs; 2456 2457 /* hash table stuff */ 2458 a->ht = NULL; 2459 a->hd = NULL; 2460 a->ht_size = 0; 2461 a->ht_flag = oldmat->ht_flag; 2462 a->ht_fact = oldmat->ht_fact; 2463 a->ht_total_ct = 0; 2464 a->ht_insert_ct = 0; 2465 2466 PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2)); 2467 if (oldmat->colmap) { 2468 #if defined(PETSC_USE_CTABLE) 2469 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2470 #else 2471 PetscCall(PetscMalloc1(a->Nbs,&a->colmap)); 2472 PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt))); 2473 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs)); 2474 #endif 2475 } else a->colmap = NULL; 2476 2477 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2478 PetscCall(PetscMalloc1(len,&a->garray)); 2479 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2480 PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2481 } else a->garray = NULL; 2482 2483 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash)); 2484 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2485 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2486 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2487 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2488 2489 PetscCall(VecDuplicate(oldmat->slvec0,&a->slvec0)); 2490 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0)); 2491 PetscCall(VecDuplicate(oldmat->slvec1,&a->slvec1)); 2492 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1)); 2493 2494 PetscCall(VecGetLocalSize(a->slvec1,&nt)); 2495 PetscCall(VecGetArray(a->slvec1,&array)); 2496 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a)); 2497 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b)); 2498 PetscCall(VecRestoreArray(a->slvec1,&array)); 2499 PetscCall(VecGetArray(a->slvec0,&array)); 2500 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b)); 2501 PetscCall(VecRestoreArray(a->slvec0,&array)); 2502 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0)); 2503 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1)); 2504 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b)); 2505 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a)); 2506 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b)); 2507 2508 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2509 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2510 a->sMvctx = oldmat->sMvctx; 2511 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx)); 2512 2513 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2514 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2515 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2516 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2517 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2518 *newmat = mat; 2519 PetscFunctionReturn(0); 2520 } 2521 2522 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2523 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2524 2525 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer) 2526 { 2527 PetscBool isbinary; 2528 2529 PetscFunctionBegin; 2530 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2531 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 2532 PetscCall(MatLoad_MPISBAIJ_Binary(mat,viewer)); 2533 PetscFunctionReturn(0); 2534 } 2535 2536 /*XXXXX@ 2537 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2538 2539 Input Parameters: 2540 . mat - the matrix 2541 . fact - factor 2542 2543 Not Collective on Mat, each process can have a different hash factor 2544 2545 Level: advanced 2546 2547 Notes: 2548 This can also be set by the command line option: -mat_use_hash_table fact 2549 2550 .seealso: `MatSetOption()` 2551 @XXXXX*/ 2552 2553 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2554 { 2555 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2556 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2557 PetscReal atmp; 2558 PetscReal *work,*svalues,*rvalues; 2559 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2560 PetscMPIInt rank,size; 2561 PetscInt *rowners_bs,dest,count,source; 2562 PetscScalar *va; 2563 MatScalar *ba; 2564 MPI_Status stat; 2565 2566 PetscFunctionBegin; 2567 PetscCheck(!idx,PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2568 PetscCall(MatGetRowMaxAbs(a->A,v,NULL)); 2569 PetscCall(VecGetArray(v,&va)); 2570 2571 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 2572 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 2573 2574 bs = A->rmap->bs; 2575 mbs = a->mbs; 2576 Mbs = a->Mbs; 2577 ba = b->a; 2578 bi = b->i; 2579 bj = b->j; 2580 2581 /* find ownerships */ 2582 rowners_bs = A->rmap->range; 2583 2584 /* each proc creates an array to be distributed */ 2585 PetscCall(PetscCalloc1(bs*Mbs,&work)); 2586 2587 /* row_max for B */ 2588 if (rank != size-1) { 2589 for (i=0; i<mbs; i++) { 2590 ncols = bi[1] - bi[0]; bi++; 2591 brow = bs*i; 2592 for (j=0; j<ncols; j++) { 2593 bcol = bs*(*bj); 2594 for (kcol=0; kcol<bs; kcol++) { 2595 col = bcol + kcol; /* local col index */ 2596 col += rowners_bs[rank+1]; /* global col index */ 2597 for (krow=0; krow<bs; krow++) { 2598 atmp = PetscAbsScalar(*ba); ba++; 2599 row = brow + krow; /* local row index */ 2600 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2601 if (work[col] < atmp) work[col] = atmp; 2602 } 2603 } 2604 bj++; 2605 } 2606 } 2607 2608 /* send values to its owners */ 2609 for (dest=rank+1; dest<size; dest++) { 2610 svalues = work + rowners_bs[dest]; 2611 count = rowners_bs[dest+1]-rowners_bs[dest]; 2612 PetscCallMPI(MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A))); 2613 } 2614 } 2615 2616 /* receive values */ 2617 if (rank) { 2618 rvalues = work; 2619 count = rowners_bs[rank+1]-rowners_bs[rank]; 2620 for (source=0; source<rank; source++) { 2621 PetscCallMPI(MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat)); 2622 /* process values */ 2623 for (i=0; i<count; i++) { 2624 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2625 } 2626 } 2627 } 2628 2629 PetscCall(VecRestoreArray(v,&va)); 2630 PetscCall(PetscFree(work)); 2631 PetscFunctionReturn(0); 2632 } 2633 2634 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2635 { 2636 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2637 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2638 PetscScalar *x,*ptr,*from; 2639 Vec bb1; 2640 const PetscScalar *b; 2641 2642 PetscFunctionBegin; 2643 PetscCheck(its > 0 && lits > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 2644 PetscCheck(bs <= 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2645 2646 if (flag == SOR_APPLY_UPPER) { 2647 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2648 PetscFunctionReturn(0); 2649 } 2650 2651 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2652 if (flag & SOR_ZERO_INITIAL_GUESS) { 2653 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx)); 2654 its--; 2655 } 2656 2657 PetscCall(VecDuplicate(bb,&bb1)); 2658 while (its--) { 2659 2660 /* lower triangular part: slvec0b = - B^T*xx */ 2661 PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b)); 2662 2663 /* copy xx into slvec0a */ 2664 PetscCall(VecGetArray(mat->slvec0,&ptr)); 2665 PetscCall(VecGetArray(xx,&x)); 2666 PetscCall(PetscArraycpy(ptr,x,bs*mbs)); 2667 PetscCall(VecRestoreArray(mat->slvec0,&ptr)); 2668 2669 PetscCall(VecScale(mat->slvec0,-1.0)); 2670 2671 /* copy bb into slvec1a */ 2672 PetscCall(VecGetArray(mat->slvec1,&ptr)); 2673 PetscCall(VecGetArrayRead(bb,&b)); 2674 PetscCall(PetscArraycpy(ptr,b,bs*mbs)); 2675 PetscCall(VecRestoreArray(mat->slvec1,&ptr)); 2676 2677 /* set slvec1b = 0 */ 2678 PetscCall(VecSet(mat->slvec1b,0.0)); 2679 2680 PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2681 PetscCall(VecRestoreArray(xx,&x)); 2682 PetscCall(VecRestoreArrayRead(bb,&b)); 2683 PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2684 2685 /* upper triangular part: bb1 = bb1 - B*x */ 2686 PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1)); 2687 2688 /* local diagonal sweep */ 2689 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx)); 2690 } 2691 PetscCall(VecDestroy(&bb1)); 2692 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2693 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2694 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2695 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2696 } else if (flag & SOR_EISENSTAT) { 2697 Vec xx1; 2698 PetscBool hasop; 2699 const PetscScalar *diag; 2700 PetscScalar *sl,scale = (omega - 2.0)/omega; 2701 PetscInt i,n; 2702 2703 if (!mat->xx1) { 2704 PetscCall(VecDuplicate(bb,&mat->xx1)); 2705 PetscCall(VecDuplicate(bb,&mat->bb1)); 2706 } 2707 xx1 = mat->xx1; 2708 bb1 = mat->bb1; 2709 2710 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 2711 2712 if (!mat->diag) { 2713 /* this is wrong for same matrix with new nonzero values */ 2714 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 2715 PetscCall(MatGetDiagonal(matin,mat->diag)); 2716 } 2717 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 2718 2719 if (hasop) { 2720 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 2721 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2722 } else { 2723 /* 2724 These two lines are replaced by code that may be a bit faster for a good compiler 2725 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2726 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2727 */ 2728 PetscCall(VecGetArray(mat->slvec1a,&sl)); 2729 PetscCall(VecGetArrayRead(mat->diag,&diag)); 2730 PetscCall(VecGetArrayRead(bb,&b)); 2731 PetscCall(VecGetArray(xx,&x)); 2732 PetscCall(VecGetLocalSize(xx,&n)); 2733 if (omega == 1.0) { 2734 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2735 PetscCall(PetscLogFlops(2.0*n)); 2736 } else { 2737 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2738 PetscCall(PetscLogFlops(3.0*n)); 2739 } 2740 PetscCall(VecRestoreArray(mat->slvec1a,&sl)); 2741 PetscCall(VecRestoreArrayRead(mat->diag,&diag)); 2742 PetscCall(VecRestoreArrayRead(bb,&b)); 2743 PetscCall(VecRestoreArray(xx,&x)); 2744 } 2745 2746 /* multiply off-diagonal portion of matrix */ 2747 PetscCall(VecSet(mat->slvec1b,0.0)); 2748 PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b)); 2749 PetscCall(VecGetArray(mat->slvec0,&from)); 2750 PetscCall(VecGetArray(xx,&x)); 2751 PetscCall(PetscArraycpy(from,x,bs*mbs)); 2752 PetscCall(VecRestoreArray(mat->slvec0,&from)); 2753 PetscCall(VecRestoreArray(xx,&x)); 2754 PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2755 PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2756 PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a)); 2757 2758 /* local sweep */ 2759 PetscCall((*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 2760 PetscCall(VecAXPY(xx,1.0,xx1)); 2761 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2762 PetscFunctionReturn(0); 2763 } 2764 2765 /*@ 2766 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2767 CSR format the local rows. 2768 2769 Collective 2770 2771 Input Parameters: 2772 + comm - MPI communicator 2773 . bs - the block size, only a block size of 1 is supported 2774 . m - number of local rows (Cannot be PETSC_DECIDE) 2775 . n - This value should be the same as the local size used in creating the 2776 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2777 calculated if N is given) For square matrices n is almost always m. 2778 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2779 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2780 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2781 . j - column indices 2782 - a - matrix values 2783 2784 Output Parameter: 2785 . mat - the matrix 2786 2787 Level: intermediate 2788 2789 Notes: 2790 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2791 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2792 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2793 2794 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2795 2796 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2797 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2798 @*/ 2799 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2800 { 2801 PetscFunctionBegin; 2802 PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2803 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2804 PetscCall(MatCreate(comm,mat)); 2805 PetscCall(MatSetSizes(*mat,m,n,M,N)); 2806 PetscCall(MatSetType(*mat,MATMPISBAIJ)); 2807 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a)); 2808 PetscFunctionReturn(0); 2809 } 2810 2811 /*@C 2812 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 2813 2814 Collective 2815 2816 Input Parameters: 2817 + B - the matrix 2818 . bs - the block size 2819 . i - the indices into j for the start of each local row (starts with zero) 2820 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2821 - v - optional values in the matrix 2822 2823 Level: advanced 2824 2825 Notes: 2826 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2827 and usually the numerical values as well 2828 2829 Any entries below the diagonal are ignored 2830 2831 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2832 @*/ 2833 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2834 { 2835 PetscFunctionBegin; 2836 PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 2837 PetscFunctionReturn(0); 2838 } 2839 2840 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2841 { 2842 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2843 PetscInt *indx; 2844 PetscScalar *values; 2845 2846 PetscFunctionBegin; 2847 PetscCall(MatGetSize(inmat,&m,&N)); 2848 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2849 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2850 PetscInt *dnz,*onz,mbs,Nbs,nbs; 2851 PetscInt *bindx,rmax=a->rmax,j; 2852 PetscMPIInt rank,size; 2853 2854 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 2855 mbs = m/bs; Nbs = N/cbs; 2856 if (n == PETSC_DECIDE) { 2857 PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N)); 2858 } 2859 nbs = n/cbs; 2860 2861 PetscCall(PetscMalloc1(rmax,&bindx)); 2862 MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */ 2863 2864 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 2865 PetscCallMPI(MPI_Comm_rank(comm,&size)); 2866 if (rank == size-1) { 2867 /* Check sum(nbs) = Nbs */ 2868 PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 2869 } 2870 2871 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2872 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 2873 for (i=0; i<mbs; i++) { 2874 PetscCall(MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */ 2875 nnz = nnz/bs; 2876 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2877 PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz)); 2878 PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); 2879 } 2880 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 2881 PetscCall(PetscFree(bindx)); 2882 2883 PetscCall(MatCreate(comm,outmat)); 2884 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 2885 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 2886 PetscCall(MatSetType(*outmat,MATSBAIJ)); 2887 PetscCall(MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz)); 2888 PetscCall(MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz)); 2889 MatPreallocateEnd(dnz,onz); 2890 } 2891 2892 /* numeric phase */ 2893 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 2894 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 2895 2896 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 2897 for (i=0; i<m; i++) { 2898 PetscCall(MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values)); 2899 Ii = i + rstart; 2900 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 2901 PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values)); 2902 } 2903 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 2904 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 2905 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 2906 PetscFunctionReturn(0); 2907 } 2908