1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #if defined(PETSC_HAVE_SCALAPACK) 11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 12 #endif 13 14 /* This could be moved to matimpl.h */ 15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 16 { 17 Mat preallocator; 18 PetscInt r,rstart,rend; 19 PetscInt bs,i,m,n,M,N; 20 PetscBool cong = PETSC_TRUE; 21 22 PetscFunctionBegin; 23 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 24 PetscValidLogicalCollectiveInt(B,nm,2); 25 for (i = 0; i < nm; i++) { 26 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 27 PetscCall(PetscLayoutCompare(B->rmap,X[i]->rmap,&cong)); 28 PetscCheck(cong,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 29 } 30 PetscValidLogicalCollectiveBool(B,fill,5); 31 PetscCall(MatGetBlockSize(B,&bs)); 32 PetscCall(MatGetSize(B,&M,&N)); 33 PetscCall(MatGetLocalSize(B,&m,&n)); 34 PetscCall(MatCreate(PetscObjectComm((PetscObject)B),&preallocator)); 35 PetscCall(MatSetType(preallocator,MATPREALLOCATOR)); 36 PetscCall(MatSetBlockSize(preallocator,bs)); 37 PetscCall(MatSetSizes(preallocator,m,n,M,N)); 38 PetscCall(MatSetUp(preallocator)); 39 PetscCall(MatGetOwnershipRange(preallocator,&rstart,&rend)); 40 for (r = rstart; r < rend; ++r) { 41 PetscInt ncols; 42 const PetscInt *row; 43 const PetscScalar *vals; 44 45 for (i = 0; i < nm; i++) { 46 PetscCall(MatGetRow(X[i],r,&ncols,&row,&vals)); 47 PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES)); 48 if (symm && symm[i]) { 49 PetscCall(MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES)); 50 } 51 PetscCall(MatRestoreRow(X[i],r,&ncols,&row,&vals)); 52 } 53 } 54 PetscCall(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY)); 55 PetscCall(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY)); 56 PetscCall(MatPreallocatorPreallocate(preallocator,fill,B)); 57 PetscCall(MatDestroy(&preallocator)); 58 PetscFunctionReturn(0); 59 } 60 61 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 62 { 63 Mat B; 64 PetscInt r; 65 66 PetscFunctionBegin; 67 if (reuse != MAT_REUSE_MATRIX) { 68 PetscBool symm = PETSC_TRUE,isdense; 69 PetscInt bs; 70 71 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 72 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 73 PetscCall(MatSetType(B,newtype)); 74 PetscCall(MatGetBlockSize(A,&bs)); 75 PetscCall(MatSetBlockSize(B,bs)); 76 PetscCall(PetscLayoutSetUp(B->rmap)); 77 PetscCall(PetscLayoutSetUp(B->cmap)); 78 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"")); 79 if (!isdense) { 80 PetscCall(MatGetRowUpperTriangular(A)); 81 PetscCall(MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE)); 82 PetscCall(MatRestoreRowUpperTriangular(A)); 83 } else { 84 PetscCall(MatSetUp(B)); 85 } 86 } else { 87 B = *newmat; 88 PetscCall(MatZeroEntries(B)); 89 } 90 91 PetscCall(MatGetRowUpperTriangular(A)); 92 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 93 PetscInt ncols; 94 const PetscInt *row; 95 const PetscScalar *vals; 96 97 PetscCall(MatGetRow(A,r,&ncols,&row,&vals)); 98 PetscCall(MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES)); 99 #if defined(PETSC_USE_COMPLEX) 100 if (A->hermitian) { 101 PetscInt i; 102 for (i = 0; i < ncols; i++) { 103 PetscCall(MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES)); 104 } 105 } else { 106 PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES)); 107 } 108 #else 109 PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES)); 110 #endif 111 PetscCall(MatRestoreRow(A,r,&ncols,&row,&vals)); 112 } 113 PetscCall(MatRestoreRowUpperTriangular(A)); 114 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 115 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 116 117 if (reuse == MAT_INPLACE_MATRIX) { 118 PetscCall(MatHeaderReplace(A,&B)); 119 } else { 120 *newmat = B; 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 126 { 127 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 128 129 PetscFunctionBegin; 130 PetscCall(MatStoreValues(aij->A)); 131 PetscCall(MatStoreValues(aij->B)); 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 136 { 137 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 138 139 PetscFunctionBegin; 140 PetscCall(MatRetrieveValues(aij->A)); 141 PetscCall(MatRetrieveValues(aij->B)); 142 PetscFunctionReturn(0); 143 } 144 145 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 146 { \ 147 brow = row/bs; \ 148 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 149 rmax = aimax[brow]; nrow = ailen[brow]; \ 150 bcol = col/bs; \ 151 ridx = row % bs; cidx = col % bs; \ 152 low = 0; high = nrow; \ 153 while (high-low > 3) { \ 154 t = (low+high)/2; \ 155 if (rp[t] > bcol) high = t; \ 156 else low = t; \ 157 } \ 158 for (_i=low; _i<high; _i++) { \ 159 if (rp[_i] > bcol) break; \ 160 if (rp[_i] == bcol) { \ 161 bap = ap + bs2*_i + bs*cidx + ridx; \ 162 if (addv == ADD_VALUES) *bap += value; \ 163 else *bap = value; \ 164 goto a_noinsert; \ 165 } \ 166 } \ 167 if (a->nonew == 1) goto a_noinsert; \ 168 PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 169 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 170 N = nrow++ - 1; \ 171 /* shift up all the later entries in this row */ \ 172 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \ 173 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 174 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 175 rp[_i] = bcol; \ 176 ap[bs2*_i + bs*cidx + ridx] = value; \ 177 A->nonzerostate++;\ 178 a_noinsert:; \ 179 ailen[brow] = nrow; \ 180 } 181 182 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 183 { \ 184 brow = row/bs; \ 185 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 186 rmax = bimax[brow]; nrow = bilen[brow]; \ 187 bcol = col/bs; \ 188 ridx = row % bs; cidx = col % bs; \ 189 low = 0; high = nrow; \ 190 while (high-low > 3) { \ 191 t = (low+high)/2; \ 192 if (rp[t] > bcol) high = t; \ 193 else low = t; \ 194 } \ 195 for (_i=low; _i<high; _i++) { \ 196 if (rp[_i] > bcol) break; \ 197 if (rp[_i] == bcol) { \ 198 bap = ap + bs2*_i + bs*cidx + ridx; \ 199 if (addv == ADD_VALUES) *bap += value; \ 200 else *bap = value; \ 201 goto b_noinsert; \ 202 } \ 203 } \ 204 if (b->nonew == 1) goto b_noinsert; \ 205 PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 206 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 207 N = nrow++ - 1; \ 208 /* shift up all the later entries in this row */ \ 209 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \ 210 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 211 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 212 rp[_i] = bcol; \ 213 ap[bs2*_i + bs*cidx + ridx] = value; \ 214 B->nonzerostate++;\ 215 b_noinsert:; \ 216 bilen[brow] = nrow; \ 217 } 218 219 /* Only add/insert a(i,j) with i<=j (blocks). 220 Any a(i,j) with i>j input by user is ingored or generates an error 221 */ 222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 223 { 224 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 225 MatScalar value; 226 PetscBool roworiented = baij->roworiented; 227 PetscInt i,j,row,col; 228 PetscInt rstart_orig=mat->rmap->rstart; 229 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 230 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 231 232 /* Some Variables required in the macro */ 233 Mat A = baij->A; 234 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 235 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 236 MatScalar *aa =a->a; 237 238 Mat B = baij->B; 239 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 240 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 241 MatScalar *ba =b->a; 242 243 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 244 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 245 MatScalar *ap,*bap; 246 247 /* for stash */ 248 PetscInt n_loc, *in_loc = NULL; 249 MatScalar *v_loc = NULL; 250 251 PetscFunctionBegin; 252 if (!baij->donotstash) { 253 if (n > baij->n_loc) { 254 PetscCall(PetscFree(baij->in_loc)); 255 PetscCall(PetscFree(baij->v_loc)); 256 PetscCall(PetscMalloc1(n,&baij->in_loc)); 257 PetscCall(PetscMalloc1(n,&baij->v_loc)); 258 259 baij->n_loc = n; 260 } 261 in_loc = baij->in_loc; 262 v_loc = baij->v_loc; 263 } 264 265 for (i=0; i<m; i++) { 266 if (im[i] < 0) continue; 267 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 268 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 269 row = im[i] - rstart_orig; /* local row index */ 270 for (j=0; j<n; j++) { 271 if (im[i]/bs > in[j]/bs) { 272 if (a->ignore_ltriangular) { 273 continue; /* ignore lower triangular blocks */ 274 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 275 } 276 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 277 col = in[j] - cstart_orig; /* local col index */ 278 brow = row/bs; bcol = col/bs; 279 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 280 if (roworiented) value = v[i*n+j]; 281 else value = v[i+j*m]; 282 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 283 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 284 } else if (in[j] < 0) continue; 285 else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 286 else { /* off-diag entry (B) */ 287 if (mat->was_assembled) { 288 if (!baij->colmap) { 289 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 290 } 291 #if defined(PETSC_USE_CTABLE) 292 PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col)); 293 col = col - 1; 294 #else 295 col = baij->colmap[in[j]/bs] - 1; 296 #endif 297 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 298 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 299 col = in[j]; 300 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 301 B = baij->B; 302 b = (Mat_SeqBAIJ*)(B)->data; 303 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 304 ba = b->a; 305 } else col += in[j]%bs; 306 } else col = in[j]; 307 if (roworiented) value = v[i*n+j]; 308 else value = v[i+j*m]; 309 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 310 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 311 } 312 } 313 } else { /* off processor entry */ 314 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 315 if (!baij->donotstash) { 316 mat->assembled = PETSC_FALSE; 317 n_loc = 0; 318 for (j=0; j<n; j++) { 319 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 320 in_loc[n_loc] = in[j]; 321 if (roworiented) { 322 v_loc[n_loc] = v[i*n+j]; 323 } else { 324 v_loc[n_loc] = v[j*m+i]; 325 } 326 n_loc++; 327 } 328 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE)); 329 } 330 } 331 } 332 PetscFunctionReturn(0); 333 } 334 335 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 336 { 337 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 338 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 339 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 340 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 341 PetscBool roworiented=a->roworiented; 342 const PetscScalar *value = v; 343 MatScalar *ap,*aa = a->a,*bap; 344 345 PetscFunctionBegin; 346 if (col < row) { 347 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 348 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 349 } 350 rp = aj + ai[row]; 351 ap = aa + bs2*ai[row]; 352 rmax = imax[row]; 353 nrow = ailen[row]; 354 value = v; 355 low = 0; 356 high = nrow; 357 358 while (high-low > 7) { 359 t = (low+high)/2; 360 if (rp[t] > col) high = t; 361 else low = t; 362 } 363 for (i=low; i<high; i++) { 364 if (rp[i] > col) break; 365 if (rp[i] == col) { 366 bap = ap + bs2*i; 367 if (roworiented) { 368 if (is == ADD_VALUES) { 369 for (ii=0; ii<bs; ii++) { 370 for (jj=ii; jj<bs2; jj+=bs) { 371 bap[jj] += *value++; 372 } 373 } 374 } else { 375 for (ii=0; ii<bs; ii++) { 376 for (jj=ii; jj<bs2; jj+=bs) { 377 bap[jj] = *value++; 378 } 379 } 380 } 381 } else { 382 if (is == ADD_VALUES) { 383 for (ii=0; ii<bs; ii++) { 384 for (jj=0; jj<bs; jj++) { 385 *bap++ += *value++; 386 } 387 } 388 } else { 389 for (ii=0; ii<bs; ii++) { 390 for (jj=0; jj<bs; jj++) { 391 *bap++ = *value++; 392 } 393 } 394 } 395 } 396 goto noinsert2; 397 } 398 } 399 if (nonew == 1) goto noinsert2; 400 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 401 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 402 N = nrow++ - 1; high++; 403 /* shift up all the later entries in this row */ 404 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 405 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 406 rp[i] = col; 407 bap = ap + bs2*i; 408 if (roworiented) { 409 for (ii=0; ii<bs; ii++) { 410 for (jj=ii; jj<bs2; jj+=bs) { 411 bap[jj] = *value++; 412 } 413 } 414 } else { 415 for (ii=0; ii<bs; ii++) { 416 for (jj=0; jj<bs; jj++) { 417 *bap++ = *value++; 418 } 419 } 420 } 421 noinsert2:; 422 ailen[row] = nrow; 423 PetscFunctionReturn(0); 424 } 425 426 /* 427 This routine is exactly duplicated in mpibaij.c 428 */ 429 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 430 { 431 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 432 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 433 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 434 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 435 PetscBool roworiented=a->roworiented; 436 const PetscScalar *value = v; 437 MatScalar *ap,*aa = a->a,*bap; 438 439 PetscFunctionBegin; 440 rp = aj + ai[row]; 441 ap = aa + bs2*ai[row]; 442 rmax = imax[row]; 443 nrow = ailen[row]; 444 low = 0; 445 high = nrow; 446 value = v; 447 while (high-low > 7) { 448 t = (low+high)/2; 449 if (rp[t] > col) high = t; 450 else low = t; 451 } 452 for (i=low; i<high; i++) { 453 if (rp[i] > col) break; 454 if (rp[i] == col) { 455 bap = ap + bs2*i; 456 if (roworiented) { 457 if (is == ADD_VALUES) { 458 for (ii=0; ii<bs; ii++) { 459 for (jj=ii; jj<bs2; jj+=bs) { 460 bap[jj] += *value++; 461 } 462 } 463 } else { 464 for (ii=0; ii<bs; ii++) { 465 for (jj=ii; jj<bs2; jj+=bs) { 466 bap[jj] = *value++; 467 } 468 } 469 } 470 } else { 471 if (is == ADD_VALUES) { 472 for (ii=0; ii<bs; ii++,value+=bs) { 473 for (jj=0; jj<bs; jj++) { 474 bap[jj] += value[jj]; 475 } 476 bap += bs; 477 } 478 } else { 479 for (ii=0; ii<bs; ii++,value+=bs) { 480 for (jj=0; jj<bs; jj++) { 481 bap[jj] = value[jj]; 482 } 483 bap += bs; 484 } 485 } 486 } 487 goto noinsert2; 488 } 489 } 490 if (nonew == 1) goto noinsert2; 491 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 492 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 493 N = nrow++ - 1; high++; 494 /* shift up all the later entries in this row */ 495 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 496 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 497 rp[i] = col; 498 bap = ap + bs2*i; 499 if (roworiented) { 500 for (ii=0; ii<bs; ii++) { 501 for (jj=ii; jj<bs2; jj+=bs) { 502 bap[jj] = *value++; 503 } 504 } 505 } else { 506 for (ii=0; ii<bs; ii++) { 507 for (jj=0; jj<bs; jj++) { 508 *bap++ = *value++; 509 } 510 } 511 } 512 noinsert2:; 513 ailen[row] = nrow; 514 PetscFunctionReturn(0); 515 } 516 517 /* 518 This routine could be optimized by removing the need for the block copy below and passing stride information 519 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 520 */ 521 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 522 { 523 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 524 const MatScalar *value; 525 MatScalar *barray =baij->barray; 526 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 527 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 528 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 529 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 530 531 PetscFunctionBegin; 532 if (!barray) { 533 PetscCall(PetscMalloc1(bs2,&barray)); 534 baij->barray = barray; 535 } 536 537 if (roworiented) { 538 stepval = (n-1)*bs; 539 } else { 540 stepval = (m-1)*bs; 541 } 542 for (i=0; i<m; i++) { 543 if (im[i] < 0) continue; 544 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 545 if (im[i] >= rstart && im[i] < rend) { 546 row = im[i] - rstart; 547 for (j=0; j<n; j++) { 548 if (im[i] > in[j]) { 549 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 550 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 551 } 552 /* If NumCol = 1 then a copy is not required */ 553 if ((roworiented) && (n == 1)) { 554 barray = (MatScalar*) v + i*bs2; 555 } else if ((!roworiented) && (m == 1)) { 556 barray = (MatScalar*) v + j*bs2; 557 } else { /* Here a copy is required */ 558 if (roworiented) { 559 value = v + i*(stepval+bs)*bs + j*bs; 560 } else { 561 value = v + j*(stepval+bs)*bs + i*bs; 562 } 563 for (ii=0; ii<bs; ii++,value+=stepval) { 564 for (jj=0; jj<bs; jj++) { 565 *barray++ = *value++; 566 } 567 } 568 barray -=bs2; 569 } 570 571 if (in[j] >= cstart && in[j] < cend) { 572 col = in[j] - cstart; 573 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 574 } else if (in[j] < 0) continue; 575 else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 576 else { 577 if (mat->was_assembled) { 578 if (!baij->colmap) { 579 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 580 } 581 582 #if defined(PETSC_USE_DEBUG) 583 #if defined(PETSC_USE_CTABLE) 584 { PetscInt data; 585 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 586 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 587 } 588 #else 589 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 590 #endif 591 #endif 592 #if defined(PETSC_USE_CTABLE) 593 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 594 col = (col - 1)/bs; 595 #else 596 col = (baij->colmap[in[j]] - 1)/bs; 597 #endif 598 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 599 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 600 col = in[j]; 601 } 602 } else col = in[j]; 603 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 604 } 605 } 606 } else { 607 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 608 if (!baij->donotstash) { 609 if (roworiented) { 610 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 611 } else { 612 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 613 } 614 } 615 } 616 } 617 PetscFunctionReturn(0); 618 } 619 620 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 621 { 622 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 623 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 624 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 625 626 PetscFunctionBegin; 627 for (i=0; i<m; i++) { 628 if (idxm[i] < 0) continue; /* negative row */ 629 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 630 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 631 row = idxm[i] - bsrstart; 632 for (j=0; j<n; j++) { 633 if (idxn[j] < 0) continue; /* negative column */ 634 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 635 if (idxn[j] >= bscstart && idxn[j] < bscend) { 636 col = idxn[j] - bscstart; 637 PetscCall(MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j)); 638 } else { 639 if (!baij->colmap) { 640 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 641 } 642 #if defined(PETSC_USE_CTABLE) 643 PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data)); 644 data--; 645 #else 646 data = baij->colmap[idxn[j]/bs]-1; 647 #endif 648 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 649 else { 650 col = data + idxn[j]%bs; 651 PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j)); 652 } 653 } 654 } 655 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 656 } 657 PetscFunctionReturn(0); 658 } 659 660 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 661 { 662 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 663 PetscReal sum[2],*lnorm2; 664 665 PetscFunctionBegin; 666 if (baij->size == 1) { 667 PetscCall(MatNorm(baij->A,type,norm)); 668 } else { 669 if (type == NORM_FROBENIUS) { 670 PetscCall(PetscMalloc1(2,&lnorm2)); 671 PetscCall(MatNorm(baij->A,type,lnorm2)); 672 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 673 PetscCall(MatNorm(baij->B,type,lnorm2)); 674 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 675 PetscCall(MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 676 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 677 PetscCall(PetscFree(lnorm2)); 678 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 679 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 680 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 681 PetscReal *rsum,*rsum2,vabs; 682 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 683 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 684 MatScalar *v; 685 686 PetscCall(PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2)); 687 PetscCall(PetscArrayzero(rsum,mat->cmap->N)); 688 /* Amat */ 689 v = amat->a; jj = amat->j; 690 for (brow=0; brow<mbs; brow++) { 691 grow = bs*(rstart + brow); 692 nz = amat->i[brow+1] - amat->i[brow]; 693 for (bcol=0; bcol<nz; bcol++) { 694 gcol = bs*(rstart + *jj); jj++; 695 for (col=0; col<bs; col++) { 696 for (row=0; row<bs; row++) { 697 vabs = PetscAbsScalar(*v); v++; 698 rsum[gcol+col] += vabs; 699 /* non-diagonal block */ 700 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 701 } 702 } 703 } 704 PetscCall(PetscLogFlops(nz*bs*bs)); 705 } 706 /* Bmat */ 707 v = bmat->a; jj = bmat->j; 708 for (brow=0; brow<mbs; brow++) { 709 grow = bs*(rstart + brow); 710 nz = bmat->i[brow+1] - bmat->i[brow]; 711 for (bcol=0; bcol<nz; bcol++) { 712 gcol = bs*garray[*jj]; jj++; 713 for (col=0; col<bs; col++) { 714 for (row=0; row<bs; row++) { 715 vabs = PetscAbsScalar(*v); v++; 716 rsum[gcol+col] += vabs; 717 rsum[grow+row] += vabs; 718 } 719 } 720 } 721 PetscCall(PetscLogFlops(nz*bs*bs)); 722 } 723 PetscCall(MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 724 *norm = 0.0; 725 for (col=0; col<mat->cmap->N; col++) { 726 if (rsum2[col] > *norm) *norm = rsum2[col]; 727 } 728 PetscCall(PetscFree2(rsum,rsum2)); 729 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 730 } 731 PetscFunctionReturn(0); 732 } 733 734 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 735 { 736 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 737 PetscInt nstash,reallocs; 738 739 PetscFunctionBegin; 740 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 741 742 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 743 PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs)); 744 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 745 PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 746 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 747 PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 748 PetscFunctionReturn(0); 749 } 750 751 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 752 { 753 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 754 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 755 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 756 PetscInt *row,*col; 757 PetscBool other_disassembled; 758 PetscMPIInt n; 759 PetscBool r1,r2,r3; 760 MatScalar *val; 761 762 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 763 PetscFunctionBegin; 764 if (!baij->donotstash && !mat->nooffprocentries) { 765 while (1) { 766 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 767 if (!flg) break; 768 769 for (i=0; i<n;) { 770 /* Now identify the consecutive vals belonging to the same row */ 771 for (j=i,rstart=row[j]; j<n; j++) { 772 if (row[j] != rstart) break; 773 } 774 if (j < n) ncols = j-i; 775 else ncols = n-i; 776 /* Now assemble all these values with a single function call */ 777 PetscCall(MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 778 i = j; 779 } 780 } 781 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 782 /* Now process the block-stash. Since the values are stashed column-oriented, 783 set the roworiented flag to column oriented, and after MatSetValues() 784 restore the original flags */ 785 r1 = baij->roworiented; 786 r2 = a->roworiented; 787 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 788 789 baij->roworiented = PETSC_FALSE; 790 a->roworiented = PETSC_FALSE; 791 792 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 793 while (1) { 794 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg)); 795 if (!flg) break; 796 797 for (i=0; i<n;) { 798 /* Now identify the consecutive vals belonging to the same row */ 799 for (j=i,rstart=row[j]; j<n; j++) { 800 if (row[j] != rstart) break; 801 } 802 if (j < n) ncols = j-i; 803 else ncols = n-i; 804 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode)); 805 i = j; 806 } 807 } 808 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 809 810 baij->roworiented = r1; 811 a->roworiented = r2; 812 813 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 814 } 815 816 PetscCall(MatAssemblyBegin(baij->A,mode)); 817 PetscCall(MatAssemblyEnd(baij->A,mode)); 818 819 /* determine if any processor has disassembled, if so we must 820 also disassemble ourselves, in order that we may reassemble. */ 821 /* 822 if nonzero structure of submatrix B cannot change then we know that 823 no processor disassembled thus we can skip this stuff 824 */ 825 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 826 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 827 if (mat->was_assembled && !other_disassembled) { 828 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 829 } 830 } 831 832 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 833 PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ 834 } 835 PetscCall(MatAssemblyBegin(baij->B,mode)); 836 PetscCall(MatAssemblyEnd(baij->B,mode)); 837 838 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 839 840 baij->rowvalues = NULL; 841 842 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 843 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 844 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 845 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 846 } 847 PetscFunctionReturn(0); 848 } 849 850 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 851 #include <petscdraw.h> 852 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 853 { 854 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 855 PetscInt bs = mat->rmap->bs; 856 PetscMPIInt rank = baij->rank; 857 PetscBool iascii,isdraw; 858 PetscViewer sviewer; 859 PetscViewerFormat format; 860 861 PetscFunctionBegin; 862 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 863 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 864 if (iascii) { 865 PetscCall(PetscViewerGetFormat(viewer,&format)); 866 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 867 MatInfo info; 868 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 869 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 870 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 871 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory)); 872 PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info)); 873 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 874 PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info)); 875 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 876 PetscCall(PetscViewerFlush(viewer)); 877 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 878 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 879 PetscCall(VecScatterView(baij->Mvctx,viewer)); 880 PetscFunctionReturn(0); 881 } else if (format == PETSC_VIEWER_ASCII_INFO) { 882 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 883 PetscFunctionReturn(0); 884 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 885 PetscFunctionReturn(0); 886 } 887 } 888 889 if (isdraw) { 890 PetscDraw draw; 891 PetscBool isnull; 892 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 893 PetscCall(PetscDrawIsNull(draw,&isnull)); 894 if (isnull) PetscFunctionReturn(0); 895 } 896 897 { 898 /* assemble the entire matrix onto first processor. */ 899 Mat A; 900 Mat_SeqSBAIJ *Aloc; 901 Mat_SeqBAIJ *Bloc; 902 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 903 MatScalar *a; 904 const char *matname; 905 906 /* Should this be the same type as mat? */ 907 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 908 if (rank == 0) { 909 PetscCall(MatSetSizes(A,M,N,M,N)); 910 } else { 911 PetscCall(MatSetSizes(A,0,0,M,N)); 912 } 913 PetscCall(MatSetType(A,MATMPISBAIJ)); 914 PetscCall(MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL)); 915 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 916 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 917 918 /* copy over the A part */ 919 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 920 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 921 PetscCall(PetscMalloc1(bs,&rvals)); 922 923 for (i=0; i<mbs; i++) { 924 rvals[0] = bs*(baij->rstartbs + i); 925 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 926 for (j=ai[i]; j<ai[i+1]; j++) { 927 col = (baij->cstartbs+aj[j])*bs; 928 for (k=0; k<bs; k++) { 929 PetscCall(MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 930 col++; 931 a += bs; 932 } 933 } 934 } 935 /* copy over the B part */ 936 Bloc = (Mat_SeqBAIJ*)baij->B->data; 937 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 938 for (i=0; i<mbs; i++) { 939 940 rvals[0] = bs*(baij->rstartbs + i); 941 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 942 for (j=ai[i]; j<ai[i+1]; j++) { 943 col = baij->garray[aj[j]]*bs; 944 for (k=0; k<bs; k++) { 945 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 946 col++; 947 a += bs; 948 } 949 } 950 } 951 PetscCall(PetscFree(rvals)); 952 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 953 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 954 /* 955 Everyone has to call to draw the matrix since the graphics waits are 956 synchronized across all processors that share the PetscDraw object 957 */ 958 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 959 PetscCall(PetscObjectGetName((PetscObject)mat,&matname)); 960 if (rank == 0) { 961 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname)); 962 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer)); 963 } 964 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 965 PetscCall(PetscViewerFlush(viewer)); 966 PetscCall(MatDestroy(&A)); 967 } 968 PetscFunctionReturn(0); 969 } 970 971 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 972 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 973 974 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 975 { 976 PetscBool iascii,isdraw,issocket,isbinary; 977 978 PetscFunctionBegin; 979 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 980 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 981 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 982 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 983 if (iascii || isdraw || issocket) { 984 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer)); 985 } else if (isbinary) { 986 PetscCall(MatView_MPISBAIJ_Binary(mat,viewer)); 987 } 988 PetscFunctionReturn(0); 989 } 990 991 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 992 { 993 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 994 995 PetscFunctionBegin; 996 #if defined(PETSC_USE_LOG) 997 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 998 #endif 999 PetscCall(MatStashDestroy_Private(&mat->stash)); 1000 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1001 PetscCall(MatDestroy(&baij->A)); 1002 PetscCall(MatDestroy(&baij->B)); 1003 #if defined(PETSC_USE_CTABLE) 1004 PetscCall(PetscTableDestroy(&baij->colmap)); 1005 #else 1006 PetscCall(PetscFree(baij->colmap)); 1007 #endif 1008 PetscCall(PetscFree(baij->garray)); 1009 PetscCall(VecDestroy(&baij->lvec)); 1010 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1011 PetscCall(VecDestroy(&baij->slvec0)); 1012 PetscCall(VecDestroy(&baij->slvec0b)); 1013 PetscCall(VecDestroy(&baij->slvec1)); 1014 PetscCall(VecDestroy(&baij->slvec1a)); 1015 PetscCall(VecDestroy(&baij->slvec1b)); 1016 PetscCall(VecScatterDestroy(&baij->sMvctx)); 1017 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 1018 PetscCall(PetscFree(baij->barray)); 1019 PetscCall(PetscFree(baij->hd)); 1020 PetscCall(VecDestroy(&baij->diag)); 1021 PetscCall(VecDestroy(&baij->bb1)); 1022 PetscCall(VecDestroy(&baij->xx1)); 1023 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1024 PetscCall(PetscFree(baij->setvaluescopy)); 1025 #endif 1026 PetscCall(PetscFree(baij->in_loc)); 1027 PetscCall(PetscFree(baij->v_loc)); 1028 PetscCall(PetscFree(baij->rangebs)); 1029 PetscCall(PetscFree(mat->data)); 1030 1031 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1032 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1033 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1034 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL)); 1035 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL)); 1036 #if defined(PETSC_HAVE_ELEMENTAL) 1037 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL)); 1038 #endif 1039 #if defined(PETSC_HAVE_SCALAPACK) 1040 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL)); 1041 #endif 1042 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL)); 1043 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL)); 1044 PetscFunctionReturn(0); 1045 } 1046 1047 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1048 { 1049 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1050 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1051 PetscScalar *from; 1052 const PetscScalar *x; 1053 1054 PetscFunctionBegin; 1055 /* diagonal part */ 1056 PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a)); 1057 PetscCall(VecSet(a->slvec1b,0.0)); 1058 1059 /* subdiagonal part */ 1060 PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name); 1061 PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b)); 1062 1063 /* copy x into the vec slvec0 */ 1064 PetscCall(VecGetArray(a->slvec0,&from)); 1065 PetscCall(VecGetArrayRead(xx,&x)); 1066 1067 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1068 PetscCall(VecRestoreArray(a->slvec0,&from)); 1069 PetscCall(VecRestoreArrayRead(xx,&x)); 1070 1071 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1072 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1073 /* supperdiagonal part */ 1074 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy)); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1079 { 1080 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1081 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1082 PetscScalar *from; 1083 const PetscScalar *x; 1084 1085 PetscFunctionBegin; 1086 /* diagonal part */ 1087 PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a)); 1088 PetscCall(VecSet(a->slvec1b,0.0)); 1089 1090 /* subdiagonal part */ 1091 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b)); 1092 1093 /* copy x into the vec slvec0 */ 1094 PetscCall(VecGetArray(a->slvec0,&from)); 1095 PetscCall(VecGetArrayRead(xx,&x)); 1096 1097 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1098 PetscCall(VecRestoreArray(a->slvec0,&from)); 1099 PetscCall(VecRestoreArrayRead(xx,&x)); 1100 1101 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1102 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1103 /* supperdiagonal part */ 1104 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy)); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz) 1109 { 1110 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1111 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1112 PetscScalar *from,zero=0.0; 1113 const PetscScalar *x; 1114 1115 PetscFunctionBegin; 1116 /* diagonal part */ 1117 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a)); 1118 PetscCall(VecSet(a->slvec1b,zero)); 1119 1120 /* subdiagonal part */ 1121 PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name); 1122 PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b)); 1123 1124 /* copy x into the vec slvec0 */ 1125 PetscCall(VecGetArray(a->slvec0,&from)); 1126 PetscCall(VecGetArrayRead(xx,&x)); 1127 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1128 PetscCall(VecRestoreArray(a->slvec0,&from)); 1129 1130 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1131 PetscCall(VecRestoreArrayRead(xx,&x)); 1132 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1133 1134 /* supperdiagonal part */ 1135 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz)); 1136 PetscFunctionReturn(0); 1137 } 1138 1139 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1140 { 1141 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1142 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1143 PetscScalar *from,zero=0.0; 1144 const PetscScalar *x; 1145 1146 PetscFunctionBegin; 1147 /* diagonal part */ 1148 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a)); 1149 PetscCall(VecSet(a->slvec1b,zero)); 1150 1151 /* subdiagonal part */ 1152 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b)); 1153 1154 /* copy x into the vec slvec0 */ 1155 PetscCall(VecGetArray(a->slvec0,&from)); 1156 PetscCall(VecGetArrayRead(xx,&x)); 1157 PetscCall(PetscArraycpy(from,x,bs*mbs)); 1158 PetscCall(VecRestoreArray(a->slvec0,&from)); 1159 1160 PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1161 PetscCall(VecRestoreArrayRead(xx,&x)); 1162 PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD)); 1163 1164 /* supperdiagonal part */ 1165 PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz)); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 /* 1170 This only works correctly for square matrices where the subblock A->A is the 1171 diagonal block 1172 */ 1173 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1174 { 1175 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1176 1177 PetscFunctionBegin; 1178 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1179 PetscCall(MatGetDiagonal(a->A,v)); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1184 { 1185 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1186 1187 PetscFunctionBegin; 1188 PetscCall(MatScale(a->A,aa)); 1189 PetscCall(MatScale(a->B,aa)); 1190 PetscFunctionReturn(0); 1191 } 1192 1193 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1194 { 1195 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1196 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1197 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1198 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1199 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1200 1201 PetscFunctionBegin; 1202 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1203 mat->getrowactive = PETSC_TRUE; 1204 1205 if (!mat->rowvalues && (idx || v)) { 1206 /* 1207 allocate enough space to hold information from the longest row. 1208 */ 1209 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1210 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1211 PetscInt max = 1,mbs = mat->mbs,tmp; 1212 for (i=0; i<mbs; i++) { 1213 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1214 if (max < tmp) max = tmp; 1215 } 1216 PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices)); 1217 } 1218 1219 PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1220 lrow = row - brstart; /* local row index */ 1221 1222 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1223 if (!v) {pvA = NULL; pvB = NULL;} 1224 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1225 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1226 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1227 nztot = nzA + nzB; 1228 1229 cmap = mat->garray; 1230 if (v || idx) { 1231 if (nztot) { 1232 /* Sort by increasing column numbers, assuming A and B already sorted */ 1233 PetscInt imark = -1; 1234 if (v) { 1235 *v = v_p = mat->rowvalues; 1236 for (i=0; i<nzB; i++) { 1237 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1238 else break; 1239 } 1240 imark = i; 1241 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1242 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1243 } 1244 if (idx) { 1245 *idx = idx_p = mat->rowindices; 1246 if (imark > -1) { 1247 for (i=0; i<imark; i++) { 1248 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1249 } 1250 } else { 1251 for (i=0; i<nzB; i++) { 1252 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1253 else break; 1254 } 1255 imark = i; 1256 } 1257 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1258 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1259 } 1260 } else { 1261 if (idx) *idx = NULL; 1262 if (v) *v = NULL; 1263 } 1264 } 1265 *nz = nztot; 1266 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1267 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1272 { 1273 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1274 1275 PetscFunctionBegin; 1276 PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1277 baij->getrowactive = PETSC_FALSE; 1278 PetscFunctionReturn(0); 1279 } 1280 1281 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1282 { 1283 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1284 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1285 1286 PetscFunctionBegin; 1287 aA->getrow_utriangular = PETSC_TRUE; 1288 PetscFunctionReturn(0); 1289 } 1290 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1291 { 1292 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1293 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1294 1295 PetscFunctionBegin; 1296 aA->getrow_utriangular = PETSC_FALSE; 1297 PetscFunctionReturn(0); 1298 } 1299 1300 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1301 { 1302 PetscFunctionBegin; 1303 if (PetscDefined(USE_COMPLEX)) { 1304 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 1305 1306 PetscCall(MatConjugate(a->A)); 1307 PetscCall(MatConjugate(a->B)); 1308 } 1309 PetscFunctionReturn(0); 1310 } 1311 1312 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1313 { 1314 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1315 1316 PetscFunctionBegin; 1317 PetscCall(MatRealPart(a->A)); 1318 PetscCall(MatRealPart(a->B)); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1323 { 1324 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1325 1326 PetscFunctionBegin; 1327 PetscCall(MatImaginaryPart(a->A)); 1328 PetscCall(MatImaginaryPart(a->B)); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1333 Input: isrow - distributed(parallel), 1334 iscol_local - locally owned (seq) 1335 */ 1336 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1337 { 1338 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1339 const PetscInt *ptr1,*ptr2; 1340 1341 PetscFunctionBegin; 1342 PetscCall(ISGetLocalSize(isrow,&sz1)); 1343 PetscCall(ISGetLocalSize(iscol_local,&sz2)); 1344 if (sz1 > sz2) { 1345 *flg = PETSC_FALSE; 1346 PetscFunctionReturn(0); 1347 } 1348 1349 PetscCall(ISGetIndices(isrow,&ptr1)); 1350 PetscCall(ISGetIndices(iscol_local,&ptr2)); 1351 1352 PetscCall(PetscMalloc1(sz1,&a1)); 1353 PetscCall(PetscMalloc1(sz2,&a2)); 1354 PetscCall(PetscArraycpy(a1,ptr1,sz1)); 1355 PetscCall(PetscArraycpy(a2,ptr2,sz2)); 1356 PetscCall(PetscSortInt(sz1,a1)); 1357 PetscCall(PetscSortInt(sz2,a2)); 1358 1359 nmatch=0; 1360 k = 0; 1361 for (i=0; i<sz1; i++) { 1362 for (j=k; j<sz2; j++) { 1363 if (a1[i] == a2[j]) { 1364 k = j; nmatch++; 1365 break; 1366 } 1367 } 1368 } 1369 PetscCall(ISRestoreIndices(isrow,&ptr1)); 1370 PetscCall(ISRestoreIndices(iscol_local,&ptr2)); 1371 PetscCall(PetscFree(a1)); 1372 PetscCall(PetscFree(a2)); 1373 if (nmatch < sz1) { 1374 *flg = PETSC_FALSE; 1375 } else { 1376 *flg = PETSC_TRUE; 1377 } 1378 PetscFunctionReturn(0); 1379 } 1380 1381 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1382 { 1383 IS iscol_local; 1384 PetscInt csize; 1385 PetscBool isequal; 1386 1387 PetscFunctionBegin; 1388 PetscCall(ISGetLocalSize(iscol,&csize)); 1389 if (call == MAT_REUSE_MATRIX) { 1390 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 1391 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1392 } else { 1393 PetscBool issorted; 1394 1395 PetscCall(ISAllGather(iscol,&iscol_local)); 1396 PetscCall(ISEqual_private(isrow,iscol_local,&isequal)); 1397 PetscCall(ISSorted(iscol_local, &issorted)); 1398 PetscCheck(isequal && issorted,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow and be sorted"); 1399 } 1400 1401 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1402 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat)); 1403 if (call == MAT_INITIAL_MATRIX) { 1404 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 1405 PetscCall(ISDestroy(&iscol_local)); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1411 { 1412 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1413 1414 PetscFunctionBegin; 1415 PetscCall(MatZeroEntries(l->A)); 1416 PetscCall(MatZeroEntries(l->B)); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1421 { 1422 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1423 Mat A = a->A,B = a->B; 1424 PetscLogDouble isend[5],irecv[5]; 1425 1426 PetscFunctionBegin; 1427 info->block_size = (PetscReal)matin->rmap->bs; 1428 1429 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1430 1431 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1432 isend[3] = info->memory; isend[4] = info->mallocs; 1433 1434 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1435 1436 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1437 isend[3] += info->memory; isend[4] += info->mallocs; 1438 if (flag == MAT_LOCAL) { 1439 info->nz_used = isend[0]; 1440 info->nz_allocated = isend[1]; 1441 info->nz_unneeded = isend[2]; 1442 info->memory = isend[3]; 1443 info->mallocs = isend[4]; 1444 } else if (flag == MAT_GLOBAL_MAX) { 1445 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1446 1447 info->nz_used = irecv[0]; 1448 info->nz_allocated = irecv[1]; 1449 info->nz_unneeded = irecv[2]; 1450 info->memory = irecv[3]; 1451 info->mallocs = irecv[4]; 1452 } else if (flag == MAT_GLOBAL_SUM) { 1453 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1454 1455 info->nz_used = irecv[0]; 1456 info->nz_allocated = irecv[1]; 1457 info->nz_unneeded = irecv[2]; 1458 info->memory = irecv[3]; 1459 info->mallocs = irecv[4]; 1460 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1461 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1462 info->fill_ratio_needed = 0; 1463 info->factor_mallocs = 0; 1464 PetscFunctionReturn(0); 1465 } 1466 1467 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1468 { 1469 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1470 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1471 1472 PetscFunctionBegin; 1473 switch (op) { 1474 case MAT_NEW_NONZERO_LOCATIONS: 1475 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1476 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1477 case MAT_KEEP_NONZERO_PATTERN: 1478 case MAT_SUBMAT_SINGLEIS: 1479 case MAT_NEW_NONZERO_LOCATION_ERR: 1480 MatCheckPreallocated(A,1); 1481 PetscCall(MatSetOption(a->A,op,flg)); 1482 PetscCall(MatSetOption(a->B,op,flg)); 1483 break; 1484 case MAT_ROW_ORIENTED: 1485 MatCheckPreallocated(A,1); 1486 a->roworiented = flg; 1487 1488 PetscCall(MatSetOption(a->A,op,flg)); 1489 PetscCall(MatSetOption(a->B,op,flg)); 1490 break; 1491 case MAT_FORCE_DIAGONAL_ENTRIES: 1492 case MAT_SORTED_FULL: 1493 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1494 break; 1495 case MAT_IGNORE_OFF_PROC_ENTRIES: 1496 a->donotstash = flg; 1497 break; 1498 case MAT_USE_HASH_TABLE: 1499 a->ht_flag = flg; 1500 break; 1501 case MAT_HERMITIAN: 1502 MatCheckPreallocated(A,1); 1503 PetscCall(MatSetOption(a->A,op,flg)); 1504 #if defined(PETSC_USE_COMPLEX) 1505 if (flg) { /* need different mat-vec ops */ 1506 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1507 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1508 A->ops->multtranspose = NULL; 1509 A->ops->multtransposeadd = NULL; 1510 A->symmetric = PETSC_FALSE; 1511 } 1512 #endif 1513 break; 1514 case MAT_SPD: 1515 case MAT_SYMMETRIC: 1516 MatCheckPreallocated(A,1); 1517 PetscCall(MatSetOption(a->A,op,flg)); 1518 #if defined(PETSC_USE_COMPLEX) 1519 if (flg) { /* restore to use default mat-vec ops */ 1520 A->ops->mult = MatMult_MPISBAIJ; 1521 A->ops->multadd = MatMultAdd_MPISBAIJ; 1522 A->ops->multtranspose = MatMult_MPISBAIJ; 1523 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1524 } 1525 #endif 1526 break; 1527 case MAT_STRUCTURALLY_SYMMETRIC: 1528 MatCheckPreallocated(A,1); 1529 PetscCall(MatSetOption(a->A,op,flg)); 1530 break; 1531 case MAT_SYMMETRY_ETERNAL: 1532 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1533 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1534 break; 1535 case MAT_IGNORE_LOWER_TRIANGULAR: 1536 aA->ignore_ltriangular = flg; 1537 break; 1538 case MAT_ERROR_LOWER_TRIANGULAR: 1539 aA->ignore_ltriangular = flg; 1540 break; 1541 case MAT_GETROW_UPPERTRIANGULAR: 1542 aA->getrow_utriangular = flg; 1543 break; 1544 default: 1545 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1546 } 1547 PetscFunctionReturn(0); 1548 } 1549 1550 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1551 { 1552 PetscFunctionBegin; 1553 if (reuse == MAT_INITIAL_MATRIX) { 1554 PetscCall(MatDuplicate(A,MAT_COPY_VALUES,B)); 1555 } else if (reuse == MAT_REUSE_MATRIX) { 1556 PetscCall(MatCopy(A,*B,SAME_NONZERO_PATTERN)); 1557 } 1558 PetscFunctionReturn(0); 1559 } 1560 1561 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1562 { 1563 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1564 Mat a = baij->A, b=baij->B; 1565 PetscInt nv,m,n; 1566 PetscBool flg; 1567 1568 PetscFunctionBegin; 1569 if (ll != rr) { 1570 PetscCall(VecEqual(ll,rr,&flg)); 1571 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same"); 1572 } 1573 if (!ll) PetscFunctionReturn(0); 1574 1575 PetscCall(MatGetLocalSize(mat,&m,&n)); 1576 PetscCheck(m == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same",m,n); 1577 1578 PetscCall(VecGetLocalSize(rr,&nv)); 1579 PetscCheck(nv==n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1580 1581 PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1582 1583 /* left diagonalscale the off-diagonal part */ 1584 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1585 1586 /* scale the diagonal part */ 1587 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1588 1589 /* right diagonalscale the off-diagonal part */ 1590 PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1591 PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec)); 1592 PetscFunctionReturn(0); 1593 } 1594 1595 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1596 { 1597 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1598 1599 PetscFunctionBegin; 1600 PetscCall(MatSetUnfactored(a->A)); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1605 1606 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1607 { 1608 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1609 Mat a,b,c,d; 1610 PetscBool flg; 1611 1612 PetscFunctionBegin; 1613 a = matA->A; b = matA->B; 1614 c = matB->A; d = matB->B; 1615 1616 PetscCall(MatEqual(a,c,&flg)); 1617 if (flg) { 1618 PetscCall(MatEqual(b,d,&flg)); 1619 } 1620 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1621 PetscFunctionReturn(0); 1622 } 1623 1624 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1625 { 1626 PetscBool isbaij; 1627 1628 PetscFunctionBegin; 1629 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"")); 1630 PetscCheck(isbaij,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1631 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1632 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1633 PetscCall(MatGetRowUpperTriangular(A)); 1634 PetscCall(MatCopy_Basic(A,B,str)); 1635 PetscCall(MatRestoreRowUpperTriangular(A)); 1636 } else { 1637 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1638 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1639 1640 PetscCall(MatCopy(a->A,b->A,str)); 1641 PetscCall(MatCopy(a->B,b->B,str)); 1642 } 1643 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1644 PetscFunctionReturn(0); 1645 } 1646 1647 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1648 { 1649 PetscFunctionBegin; 1650 PetscCall(MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 1651 PetscFunctionReturn(0); 1652 } 1653 1654 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1655 { 1656 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1657 PetscBLASInt bnz,one=1; 1658 Mat_SeqSBAIJ *xa,*ya; 1659 Mat_SeqBAIJ *xb,*yb; 1660 1661 PetscFunctionBegin; 1662 if (str == SAME_NONZERO_PATTERN) { 1663 PetscScalar alpha = a; 1664 xa = (Mat_SeqSBAIJ*)xx->A->data; 1665 ya = (Mat_SeqSBAIJ*)yy->A->data; 1666 PetscCall(PetscBLASIntCast(xa->nz,&bnz)); 1667 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1668 xb = (Mat_SeqBAIJ*)xx->B->data; 1669 yb = (Mat_SeqBAIJ*)yy->B->data; 1670 PetscCall(PetscBLASIntCast(xb->nz,&bnz)); 1671 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1672 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1673 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1674 PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 1675 PetscCall(MatAXPY_Basic(Y,a,X,str)); 1676 PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 1677 } else { 1678 Mat B; 1679 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1680 PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1681 PetscCall(MatGetRowUpperTriangular(X)); 1682 PetscCall(MatGetRowUpperTriangular(Y)); 1683 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 1684 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 1685 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 1686 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 1687 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 1688 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 1689 PetscCall(MatSetType(B,MATMPISBAIJ)); 1690 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d)); 1691 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 1692 PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o)); 1693 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 1694 PetscCall(MatHeaderMerge(Y,&B)); 1695 PetscCall(PetscFree(nnz_d)); 1696 PetscCall(PetscFree(nnz_o)); 1697 PetscCall(MatRestoreRowUpperTriangular(X)); 1698 PetscCall(MatRestoreRowUpperTriangular(Y)); 1699 } 1700 PetscFunctionReturn(0); 1701 } 1702 1703 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1704 { 1705 PetscInt i; 1706 PetscBool flg; 1707 1708 PetscFunctionBegin; 1709 PetscCall(MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B)); /* B[] are sbaij matrices */ 1710 for (i=0; i<n; i++) { 1711 PetscCall(ISEqual(irow[i],icol[i],&flg)); 1712 if (!flg) { 1713 PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1714 } 1715 } 1716 PetscFunctionReturn(0); 1717 } 1718 1719 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1720 { 1721 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1722 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1723 1724 PetscFunctionBegin; 1725 if (!Y->preallocated) { 1726 PetscCall(MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL)); 1727 } else if (!aij->nz) { 1728 PetscInt nonew = aij->nonew; 1729 PetscCall(MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL)); 1730 aij->nonew = nonew; 1731 } 1732 PetscCall(MatShift_Basic(Y,a)); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1737 { 1738 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1739 1740 PetscFunctionBegin; 1741 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1742 PetscCall(MatMissingDiagonal(a->A,missing,d)); 1743 if (d) { 1744 PetscInt rstart; 1745 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 1746 *d += rstart/A->rmap->bs; 1747 1748 } 1749 PetscFunctionReturn(0); 1750 } 1751 1752 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1753 { 1754 PetscFunctionBegin; 1755 *a = ((Mat_MPISBAIJ*)A->data)->A; 1756 PetscFunctionReturn(0); 1757 } 1758 1759 /* -------------------------------------------------------------------*/ 1760 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1761 MatGetRow_MPISBAIJ, 1762 MatRestoreRow_MPISBAIJ, 1763 MatMult_MPISBAIJ, 1764 /* 4*/ MatMultAdd_MPISBAIJ, 1765 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1766 MatMultAdd_MPISBAIJ, 1767 NULL, 1768 NULL, 1769 NULL, 1770 /* 10*/ NULL, 1771 NULL, 1772 NULL, 1773 MatSOR_MPISBAIJ, 1774 MatTranspose_MPISBAIJ, 1775 /* 15*/ MatGetInfo_MPISBAIJ, 1776 MatEqual_MPISBAIJ, 1777 MatGetDiagonal_MPISBAIJ, 1778 MatDiagonalScale_MPISBAIJ, 1779 MatNorm_MPISBAIJ, 1780 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1781 MatAssemblyEnd_MPISBAIJ, 1782 MatSetOption_MPISBAIJ, 1783 MatZeroEntries_MPISBAIJ, 1784 /* 24*/ NULL, 1785 NULL, 1786 NULL, 1787 NULL, 1788 NULL, 1789 /* 29*/ MatSetUp_MPISBAIJ, 1790 NULL, 1791 NULL, 1792 MatGetDiagonalBlock_MPISBAIJ, 1793 NULL, 1794 /* 34*/ MatDuplicate_MPISBAIJ, 1795 NULL, 1796 NULL, 1797 NULL, 1798 NULL, 1799 /* 39*/ MatAXPY_MPISBAIJ, 1800 MatCreateSubMatrices_MPISBAIJ, 1801 MatIncreaseOverlap_MPISBAIJ, 1802 MatGetValues_MPISBAIJ, 1803 MatCopy_MPISBAIJ, 1804 /* 44*/ NULL, 1805 MatScale_MPISBAIJ, 1806 MatShift_MPISBAIJ, 1807 NULL, 1808 NULL, 1809 /* 49*/ NULL, 1810 NULL, 1811 NULL, 1812 NULL, 1813 NULL, 1814 /* 54*/ NULL, 1815 NULL, 1816 MatSetUnfactored_MPISBAIJ, 1817 NULL, 1818 MatSetValuesBlocked_MPISBAIJ, 1819 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1820 NULL, 1821 NULL, 1822 NULL, 1823 NULL, 1824 /* 64*/ NULL, 1825 NULL, 1826 NULL, 1827 NULL, 1828 NULL, 1829 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1830 NULL, 1831 MatConvert_MPISBAIJ_Basic, 1832 NULL, 1833 NULL, 1834 /* 74*/ NULL, 1835 NULL, 1836 NULL, 1837 NULL, 1838 NULL, 1839 /* 79*/ NULL, 1840 NULL, 1841 NULL, 1842 NULL, 1843 MatLoad_MPISBAIJ, 1844 /* 84*/ NULL, 1845 NULL, 1846 NULL, 1847 NULL, 1848 NULL, 1849 /* 89*/ NULL, 1850 NULL, 1851 NULL, 1852 NULL, 1853 NULL, 1854 /* 94*/ NULL, 1855 NULL, 1856 NULL, 1857 NULL, 1858 NULL, 1859 /* 99*/ NULL, 1860 NULL, 1861 NULL, 1862 MatConjugate_MPISBAIJ, 1863 NULL, 1864 /*104*/ NULL, 1865 MatRealPart_MPISBAIJ, 1866 MatImaginaryPart_MPISBAIJ, 1867 MatGetRowUpperTriangular_MPISBAIJ, 1868 MatRestoreRowUpperTriangular_MPISBAIJ, 1869 /*109*/ NULL, 1870 NULL, 1871 NULL, 1872 NULL, 1873 MatMissingDiagonal_MPISBAIJ, 1874 /*114*/ NULL, 1875 NULL, 1876 NULL, 1877 NULL, 1878 NULL, 1879 /*119*/ NULL, 1880 NULL, 1881 NULL, 1882 NULL, 1883 NULL, 1884 /*124*/ NULL, 1885 NULL, 1886 NULL, 1887 NULL, 1888 NULL, 1889 /*129*/ NULL, 1890 NULL, 1891 NULL, 1892 NULL, 1893 NULL, 1894 /*134*/ NULL, 1895 NULL, 1896 NULL, 1897 NULL, 1898 NULL, 1899 /*139*/ MatSetBlockSizes_Default, 1900 NULL, 1901 NULL, 1902 NULL, 1903 NULL, 1904 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1905 NULL, 1906 NULL, 1907 NULL 1908 }; 1909 1910 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1911 { 1912 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1913 PetscInt i,mbs,Mbs; 1914 PetscMPIInt size; 1915 1916 PetscFunctionBegin; 1917 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 1918 PetscCall(PetscLayoutSetUp(B->rmap)); 1919 PetscCall(PetscLayoutSetUp(B->cmap)); 1920 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 1921 PetscCheck(B->rmap->N <= B->cmap->N,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->N,B->cmap->N); 1922 PetscCheck(B->rmap->n <= B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->n,B->cmap->n); 1923 1924 mbs = B->rmap->n/bs; 1925 Mbs = B->rmap->N/bs; 1926 PetscCheck(mbs*bs == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,bs); 1927 1928 B->rmap->bs = bs; 1929 b->bs2 = bs*bs; 1930 b->mbs = mbs; 1931 b->Mbs = Mbs; 1932 b->nbs = B->cmap->n/bs; 1933 b->Nbs = B->cmap->N/bs; 1934 1935 for (i=0; i<=b->size; i++) { 1936 b->rangebs[i] = B->rmap->range[i]/bs; 1937 } 1938 b->rstartbs = B->rmap->rstart/bs; 1939 b->rendbs = B->rmap->rend/bs; 1940 1941 b->cstartbs = B->cmap->rstart/bs; 1942 b->cendbs = B->cmap->rend/bs; 1943 1944 #if defined(PETSC_USE_CTABLE) 1945 PetscCall(PetscTableDestroy(&b->colmap)); 1946 #else 1947 PetscCall(PetscFree(b->colmap)); 1948 #endif 1949 PetscCall(PetscFree(b->garray)); 1950 PetscCall(VecDestroy(&b->lvec)); 1951 PetscCall(VecScatterDestroy(&b->Mvctx)); 1952 PetscCall(VecDestroy(&b->slvec0)); 1953 PetscCall(VecDestroy(&b->slvec0b)); 1954 PetscCall(VecDestroy(&b->slvec1)); 1955 PetscCall(VecDestroy(&b->slvec1a)); 1956 PetscCall(VecDestroy(&b->slvec1b)); 1957 PetscCall(VecScatterDestroy(&b->sMvctx)); 1958 1959 /* Because the B will have been resized we simply destroy it and create a new one each time */ 1960 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 1961 PetscCall(MatDestroy(&b->B)); 1962 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 1963 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 1964 PetscCall(MatSetType(b->B,MATSEQBAIJ)); 1965 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 1966 1967 if (!B->preallocated) { 1968 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 1969 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 1970 PetscCall(MatSetType(b->A,MATSEQSBAIJ)); 1971 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 1972 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash)); 1973 } 1974 1975 PetscCall(MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz)); 1976 PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz)); 1977 1978 B->preallocated = PETSC_TRUE; 1979 B->was_assembled = PETSC_FALSE; 1980 B->assembled = PETSC_FALSE; 1981 PetscFunctionReturn(0); 1982 } 1983 1984 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1985 { 1986 PetscInt m,rstart,cend; 1987 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 1988 const PetscInt *JJ =NULL; 1989 PetscScalar *values=NULL; 1990 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 1991 PetscBool nooffprocentries; 1992 1993 PetscFunctionBegin; 1994 PetscCheck(bs >= 1,PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs); 1995 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 1996 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 1997 PetscCall(PetscLayoutSetUp(B->rmap)); 1998 PetscCall(PetscLayoutSetUp(B->cmap)); 1999 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2000 m = B->rmap->n/bs; 2001 rstart = B->rmap->rstart/bs; 2002 cend = B->cmap->rend/bs; 2003 2004 PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2005 PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz)); 2006 for (i=0; i<m; i++) { 2007 nz = ii[i+1] - ii[i]; 2008 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2009 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2010 JJ = jj + ii[i]; 2011 bd = 0; 2012 for (j=0; j<nz; j++) { 2013 if (*JJ >= i + rstart) break; 2014 JJ++; 2015 bd++; 2016 } 2017 d = 0; 2018 for (; j<nz; j++) { 2019 if (*JJ++ >= cend) break; 2020 d++; 2021 } 2022 d_nnz[i] = d; 2023 o_nnz[i] = nz - d - bd; 2024 nz = nz - bd; 2025 nz_max = PetscMax(nz_max,nz); 2026 } 2027 PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz)); 2028 PetscCall(MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)); 2029 PetscCall(PetscFree2(d_nnz,o_nnz)); 2030 2031 values = (PetscScalar*)V; 2032 if (!values) { 2033 PetscCall(PetscCalloc1(bs*bs*nz_max,&values)); 2034 } 2035 for (i=0; i<m; i++) { 2036 PetscInt row = i + rstart; 2037 PetscInt ncols = ii[i+1] - ii[i]; 2038 const PetscInt *icols = jj + ii[i]; 2039 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2040 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2041 PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES)); 2042 } else { /* block ordering does not match so we can only insert one block at a time. */ 2043 PetscInt j; 2044 for (j=0; j<ncols; j++) { 2045 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2046 PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES)); 2047 } 2048 } 2049 } 2050 2051 if (!V) PetscCall(PetscFree(values)); 2052 nooffprocentries = B->nooffprocentries; 2053 B->nooffprocentries = PETSC_TRUE; 2054 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2055 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2056 B->nooffprocentries = nooffprocentries; 2057 2058 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2059 PetscFunctionReturn(0); 2060 } 2061 2062 /*MC 2063 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2064 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2065 the matrix is stored. 2066 2067 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2068 can call MatSetOption(Mat, MAT_HERMITIAN); 2069 2070 Options Database Keys: 2071 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2072 2073 Notes: 2074 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2075 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2076 2077 Level: beginner 2078 2079 .seealso: `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2080 M*/ 2081 2082 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2083 { 2084 Mat_MPISBAIJ *b; 2085 PetscBool flg = PETSC_FALSE; 2086 2087 PetscFunctionBegin; 2088 PetscCall(PetscNewLog(B,&b)); 2089 B->data = (void*)b; 2090 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 2091 2092 B->ops->destroy = MatDestroy_MPISBAIJ; 2093 B->ops->view = MatView_MPISBAIJ; 2094 B->assembled = PETSC_FALSE; 2095 B->insertmode = NOT_SET_VALUES; 2096 2097 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 2098 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size)); 2099 2100 /* build local table of row and column ownerships */ 2101 PetscCall(PetscMalloc1(b->size+2,&b->rangebs)); 2102 2103 /* build cache for off array entries formed */ 2104 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 2105 2106 b->donotstash = PETSC_FALSE; 2107 b->colmap = NULL; 2108 b->garray = NULL; 2109 b->roworiented = PETSC_TRUE; 2110 2111 /* stuff used in block assembly */ 2112 b->barray = NULL; 2113 2114 /* stuff used for matrix vector multiply */ 2115 b->lvec = NULL; 2116 b->Mvctx = NULL; 2117 b->slvec0 = NULL; 2118 b->slvec0b = NULL; 2119 b->slvec1 = NULL; 2120 b->slvec1a = NULL; 2121 b->slvec1b = NULL; 2122 b->sMvctx = NULL; 2123 2124 /* stuff for MatGetRow() */ 2125 b->rowindices = NULL; 2126 b->rowvalues = NULL; 2127 b->getrowactive = PETSC_FALSE; 2128 2129 /* hash table stuff */ 2130 b->ht = NULL; 2131 b->hd = NULL; 2132 b->ht_size = 0; 2133 b->ht_flag = PETSC_FALSE; 2134 b->ht_fact = 0; 2135 b->ht_total_ct = 0; 2136 b->ht_insert_ct = 0; 2137 2138 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2139 b->ijonly = PETSC_FALSE; 2140 2141 b->in_loc = NULL; 2142 b->v_loc = NULL; 2143 b->n_loc = 0; 2144 2145 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ)); 2146 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ)); 2147 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ)); 2148 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2149 #if defined(PETSC_HAVE_ELEMENTAL) 2150 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental)); 2151 #endif 2152 #if defined(PETSC_HAVE_SCALAPACK) 2153 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK)); 2154 #endif 2155 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic)); 2156 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic)); 2157 2158 B->symmetric = PETSC_TRUE; 2159 B->structurally_symmetric = PETSC_TRUE; 2160 B->symmetric_set = PETSC_TRUE; 2161 B->structurally_symmetric_set = PETSC_TRUE; 2162 B->symmetric_eternal = PETSC_TRUE; 2163 #if defined(PETSC_USE_COMPLEX) 2164 B->hermitian = PETSC_FALSE; 2165 B->hermitian_set = PETSC_FALSE; 2166 #else 2167 B->hermitian = PETSC_TRUE; 2168 B->hermitian_set = PETSC_TRUE; 2169 #endif 2170 2171 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ)); 2172 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat"); 2173 PetscCall(PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL)); 2174 if (flg) { 2175 PetscReal fact = 1.39; 2176 PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE)); 2177 PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL)); 2178 if (fact <= 1.0) fact = 1.39; 2179 PetscCall(MatMPIBAIJSetHashTableFactor(B,fact)); 2180 PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact)); 2181 } 2182 PetscOptionsEnd(); 2183 PetscFunctionReturn(0); 2184 } 2185 2186 /*MC 2187 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2188 2189 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2190 and MATMPISBAIJ otherwise. 2191 2192 Options Database Keys: 2193 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2194 2195 Level: beginner 2196 2197 .seealso: `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2198 M*/ 2199 2200 /*@C 2201 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2202 the user should preallocate the matrix storage by setting the parameters 2203 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2204 performance can be increased by more than a factor of 50. 2205 2206 Collective on Mat 2207 2208 Input Parameters: 2209 + B - the matrix 2210 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2211 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2212 . d_nz - number of block nonzeros per block row in diagonal portion of local 2213 submatrix (same for all local rows) 2214 . d_nnz - array containing the number of block nonzeros in the various block rows 2215 in the upper triangular and diagonal part of the in diagonal portion of the local 2216 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2217 for the diagonal entry and set a value even if it is zero. 2218 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2219 submatrix (same for all local rows). 2220 - o_nnz - array containing the number of nonzeros in the various block rows of the 2221 off-diagonal portion of the local submatrix that is right of the diagonal 2222 (possibly different for each block row) or NULL. 2223 2224 Options Database Keys: 2225 + -mat_no_unroll - uses code that does not unroll the loops in the 2226 block calculations (much slower) 2227 - -mat_block_size - size of the blocks to use 2228 2229 Notes: 2230 2231 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2232 than it must be used on all processors that share the object for that argument. 2233 2234 If the *_nnz parameter is given then the *_nz parameter is ignored 2235 2236 Storage Information: 2237 For a square global matrix we define each processor's diagonal portion 2238 to be its local rows and the corresponding columns (a square submatrix); 2239 each processor's off-diagonal portion encompasses the remainder of the 2240 local matrix (a rectangular submatrix). 2241 2242 The user can specify preallocated storage for the diagonal part of 2243 the local submatrix with either d_nz or d_nnz (not both). Set 2244 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2245 memory allocation. Likewise, specify preallocated storage for the 2246 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2247 2248 You can call MatGetInfo() to get information on how effective the preallocation was; 2249 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2250 You can also run with the option -info and look for messages with the string 2251 malloc in them to see if additional memory allocation was needed. 2252 2253 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2254 the figure below we depict these three local rows and all columns (0-11). 2255 2256 .vb 2257 0 1 2 3 4 5 6 7 8 9 10 11 2258 -------------------------- 2259 row 3 |. . . d d d o o o o o o 2260 row 4 |. . . d d d o o o o o o 2261 row 5 |. . . d d d o o o o o o 2262 -------------------------- 2263 .ve 2264 2265 Thus, any entries in the d locations are stored in the d (diagonal) 2266 submatrix, and any entries in the o locations are stored in the 2267 o (off-diagonal) submatrix. Note that the d matrix is stored in 2268 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2269 2270 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2271 plus the diagonal part of the d matrix, 2272 and o_nz should indicate the number of block nonzeros per row in the o matrix 2273 2274 In general, for PDE problems in which most nonzeros are near the diagonal, 2275 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2276 or you will get TERRIBLE performance; see the users' manual chapter on 2277 matrices. 2278 2279 Level: intermediate 2280 2281 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2282 @*/ 2283 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2284 { 2285 PetscFunctionBegin; 2286 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2287 PetscValidType(B,1); 2288 PetscValidLogicalCollectiveInt(B,bs,2); 2289 PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz)); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 /*@C 2294 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2295 (block compressed row). For good matrix assembly performance 2296 the user should preallocate the matrix storage by setting the parameters 2297 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2298 performance can be increased by more than a factor of 50. 2299 2300 Collective 2301 2302 Input Parameters: 2303 + comm - MPI communicator 2304 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2305 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2306 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2307 This value should be the same as the local size used in creating the 2308 y vector for the matrix-vector product y = Ax. 2309 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2310 This value should be the same as the local size used in creating the 2311 x vector for the matrix-vector product y = Ax. 2312 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2313 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2314 . d_nz - number of block nonzeros per block row in diagonal portion of local 2315 submatrix (same for all local rows) 2316 . d_nnz - array containing the number of block nonzeros in the various block rows 2317 in the upper triangular portion of the in diagonal portion of the local 2318 (possibly different for each block block row) or NULL. 2319 If you plan to factor the matrix you must leave room for the diagonal entry and 2320 set its value even if it is zero. 2321 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2322 submatrix (same for all local rows). 2323 - o_nnz - array containing the number of nonzeros in the various block rows of the 2324 off-diagonal portion of the local submatrix (possibly different for 2325 each block row) or NULL. 2326 2327 Output Parameter: 2328 . A - the matrix 2329 2330 Options Database Keys: 2331 + -mat_no_unroll - uses code that does not unroll the loops in the 2332 block calculations (much slower) 2333 . -mat_block_size - size of the blocks to use 2334 - -mat_mpi - use the parallel matrix data structures even on one processor 2335 (defaults to using SeqBAIJ format on one processor) 2336 2337 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2338 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2339 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2340 2341 Notes: 2342 The number of rows and columns must be divisible by blocksize. 2343 This matrix type does not support complex Hermitian operation. 2344 2345 The user MUST specify either the local or global matrix dimensions 2346 (possibly both). 2347 2348 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2349 than it must be used on all processors that share the object for that argument. 2350 2351 If the *_nnz parameter is given then the *_nz parameter is ignored 2352 2353 Storage Information: 2354 For a square global matrix we define each processor's diagonal portion 2355 to be its local rows and the corresponding columns (a square submatrix); 2356 each processor's off-diagonal portion encompasses the remainder of the 2357 local matrix (a rectangular submatrix). 2358 2359 The user can specify preallocated storage for the diagonal part of 2360 the local submatrix with either d_nz or d_nnz (not both). Set 2361 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2362 memory allocation. Likewise, specify preallocated storage for the 2363 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2364 2365 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2366 the figure below we depict these three local rows and all columns (0-11). 2367 2368 .vb 2369 0 1 2 3 4 5 6 7 8 9 10 11 2370 -------------------------- 2371 row 3 |. . . d d d o o o o o o 2372 row 4 |. . . d d d o o o o o o 2373 row 5 |. . . d d d o o o o o o 2374 -------------------------- 2375 .ve 2376 2377 Thus, any entries in the d locations are stored in the d (diagonal) 2378 submatrix, and any entries in the o locations are stored in the 2379 o (off-diagonal) submatrix. Note that the d matrix is stored in 2380 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2381 2382 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2383 plus the diagonal part of the d matrix, 2384 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2385 In general, for PDE problems in which most nonzeros are near the diagonal, 2386 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2387 or you will get TERRIBLE performance; see the users' manual chapter on 2388 matrices. 2389 2390 Level: intermediate 2391 2392 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2393 @*/ 2394 2395 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2396 { 2397 PetscMPIInt size; 2398 2399 PetscFunctionBegin; 2400 PetscCall(MatCreate(comm,A)); 2401 PetscCall(MatSetSizes(*A,m,n,M,N)); 2402 PetscCallMPI(MPI_Comm_size(comm,&size)); 2403 if (size > 1) { 2404 PetscCall(MatSetType(*A,MATMPISBAIJ)); 2405 PetscCall(MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz)); 2406 } else { 2407 PetscCall(MatSetType(*A,MATSEQSBAIJ)); 2408 PetscCall(MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz)); 2409 } 2410 PetscFunctionReturn(0); 2411 } 2412 2413 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2414 { 2415 Mat mat; 2416 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2417 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2418 PetscScalar *array; 2419 2420 PetscFunctionBegin; 2421 *newmat = NULL; 2422 2423 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2424 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2425 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2426 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2427 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2428 2429 mat->factortype = matin->factortype; 2430 mat->preallocated = PETSC_TRUE; 2431 mat->assembled = PETSC_TRUE; 2432 mat->insertmode = NOT_SET_VALUES; 2433 2434 a = (Mat_MPISBAIJ*)mat->data; 2435 a->bs2 = oldmat->bs2; 2436 a->mbs = oldmat->mbs; 2437 a->nbs = oldmat->nbs; 2438 a->Mbs = oldmat->Mbs; 2439 a->Nbs = oldmat->Nbs; 2440 2441 a->size = oldmat->size; 2442 a->rank = oldmat->rank; 2443 a->donotstash = oldmat->donotstash; 2444 a->roworiented = oldmat->roworiented; 2445 a->rowindices = NULL; 2446 a->rowvalues = NULL; 2447 a->getrowactive = PETSC_FALSE; 2448 a->barray = NULL; 2449 a->rstartbs = oldmat->rstartbs; 2450 a->rendbs = oldmat->rendbs; 2451 a->cstartbs = oldmat->cstartbs; 2452 a->cendbs = oldmat->cendbs; 2453 2454 /* hash table stuff */ 2455 a->ht = NULL; 2456 a->hd = NULL; 2457 a->ht_size = 0; 2458 a->ht_flag = oldmat->ht_flag; 2459 a->ht_fact = oldmat->ht_fact; 2460 a->ht_total_ct = 0; 2461 a->ht_insert_ct = 0; 2462 2463 PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2)); 2464 if (oldmat->colmap) { 2465 #if defined(PETSC_USE_CTABLE) 2466 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2467 #else 2468 PetscCall(PetscMalloc1(a->Nbs,&a->colmap)); 2469 PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt))); 2470 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs)); 2471 #endif 2472 } else a->colmap = NULL; 2473 2474 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2475 PetscCall(PetscMalloc1(len,&a->garray)); 2476 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2477 PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2478 } else a->garray = NULL; 2479 2480 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash)); 2481 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2482 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2483 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2484 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2485 2486 PetscCall(VecDuplicate(oldmat->slvec0,&a->slvec0)); 2487 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0)); 2488 PetscCall(VecDuplicate(oldmat->slvec1,&a->slvec1)); 2489 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1)); 2490 2491 PetscCall(VecGetLocalSize(a->slvec1,&nt)); 2492 PetscCall(VecGetArray(a->slvec1,&array)); 2493 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a)); 2494 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b)); 2495 PetscCall(VecRestoreArray(a->slvec1,&array)); 2496 PetscCall(VecGetArray(a->slvec0,&array)); 2497 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b)); 2498 PetscCall(VecRestoreArray(a->slvec0,&array)); 2499 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0)); 2500 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1)); 2501 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b)); 2502 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a)); 2503 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b)); 2504 2505 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2506 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2507 a->sMvctx = oldmat->sMvctx; 2508 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx)); 2509 2510 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2511 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2512 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2513 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2514 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2515 *newmat = mat; 2516 PetscFunctionReturn(0); 2517 } 2518 2519 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2520 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2521 2522 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer) 2523 { 2524 PetscBool isbinary; 2525 2526 PetscFunctionBegin; 2527 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2528 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 2529 PetscCall(MatLoad_MPISBAIJ_Binary(mat,viewer)); 2530 PetscFunctionReturn(0); 2531 } 2532 2533 /*XXXXX@ 2534 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2535 2536 Input Parameters: 2537 . mat - the matrix 2538 . fact - factor 2539 2540 Not Collective on Mat, each process can have a different hash factor 2541 2542 Level: advanced 2543 2544 Notes: 2545 This can also be set by the command line option: -mat_use_hash_table fact 2546 2547 .seealso: `MatSetOption()` 2548 @XXXXX*/ 2549 2550 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2551 { 2552 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2553 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2554 PetscReal atmp; 2555 PetscReal *work,*svalues,*rvalues; 2556 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2557 PetscMPIInt rank,size; 2558 PetscInt *rowners_bs,dest,count,source; 2559 PetscScalar *va; 2560 MatScalar *ba; 2561 MPI_Status stat; 2562 2563 PetscFunctionBegin; 2564 PetscCheck(!idx,PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2565 PetscCall(MatGetRowMaxAbs(a->A,v,NULL)); 2566 PetscCall(VecGetArray(v,&va)); 2567 2568 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 2569 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 2570 2571 bs = A->rmap->bs; 2572 mbs = a->mbs; 2573 Mbs = a->Mbs; 2574 ba = b->a; 2575 bi = b->i; 2576 bj = b->j; 2577 2578 /* find ownerships */ 2579 rowners_bs = A->rmap->range; 2580 2581 /* each proc creates an array to be distributed */ 2582 PetscCall(PetscCalloc1(bs*Mbs,&work)); 2583 2584 /* row_max for B */ 2585 if (rank != size-1) { 2586 for (i=0; i<mbs; i++) { 2587 ncols = bi[1] - bi[0]; bi++; 2588 brow = bs*i; 2589 for (j=0; j<ncols; j++) { 2590 bcol = bs*(*bj); 2591 for (kcol=0; kcol<bs; kcol++) { 2592 col = bcol + kcol; /* local col index */ 2593 col += rowners_bs[rank+1]; /* global col index */ 2594 for (krow=0; krow<bs; krow++) { 2595 atmp = PetscAbsScalar(*ba); ba++; 2596 row = brow + krow; /* local row index */ 2597 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2598 if (work[col] < atmp) work[col] = atmp; 2599 } 2600 } 2601 bj++; 2602 } 2603 } 2604 2605 /* send values to its owners */ 2606 for (dest=rank+1; dest<size; dest++) { 2607 svalues = work + rowners_bs[dest]; 2608 count = rowners_bs[dest+1]-rowners_bs[dest]; 2609 PetscCallMPI(MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A))); 2610 } 2611 } 2612 2613 /* receive values */ 2614 if (rank) { 2615 rvalues = work; 2616 count = rowners_bs[rank+1]-rowners_bs[rank]; 2617 for (source=0; source<rank; source++) { 2618 PetscCallMPI(MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat)); 2619 /* process values */ 2620 for (i=0; i<count; i++) { 2621 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2622 } 2623 } 2624 } 2625 2626 PetscCall(VecRestoreArray(v,&va)); 2627 PetscCall(PetscFree(work)); 2628 PetscFunctionReturn(0); 2629 } 2630 2631 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2632 { 2633 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2634 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2635 PetscScalar *x,*ptr,*from; 2636 Vec bb1; 2637 const PetscScalar *b; 2638 2639 PetscFunctionBegin; 2640 PetscCheck(its > 0 && lits > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 2641 PetscCheck(bs <= 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2642 2643 if (flag == SOR_APPLY_UPPER) { 2644 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2645 PetscFunctionReturn(0); 2646 } 2647 2648 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2649 if (flag & SOR_ZERO_INITIAL_GUESS) { 2650 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx)); 2651 its--; 2652 } 2653 2654 PetscCall(VecDuplicate(bb,&bb1)); 2655 while (its--) { 2656 2657 /* lower triangular part: slvec0b = - B^T*xx */ 2658 PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b)); 2659 2660 /* copy xx into slvec0a */ 2661 PetscCall(VecGetArray(mat->slvec0,&ptr)); 2662 PetscCall(VecGetArray(xx,&x)); 2663 PetscCall(PetscArraycpy(ptr,x,bs*mbs)); 2664 PetscCall(VecRestoreArray(mat->slvec0,&ptr)); 2665 2666 PetscCall(VecScale(mat->slvec0,-1.0)); 2667 2668 /* copy bb into slvec1a */ 2669 PetscCall(VecGetArray(mat->slvec1,&ptr)); 2670 PetscCall(VecGetArrayRead(bb,&b)); 2671 PetscCall(PetscArraycpy(ptr,b,bs*mbs)); 2672 PetscCall(VecRestoreArray(mat->slvec1,&ptr)); 2673 2674 /* set slvec1b = 0 */ 2675 PetscCall(VecSet(mat->slvec1b,0.0)); 2676 2677 PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2678 PetscCall(VecRestoreArray(xx,&x)); 2679 PetscCall(VecRestoreArrayRead(bb,&b)); 2680 PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2681 2682 /* upper triangular part: bb1 = bb1 - B*x */ 2683 PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1)); 2684 2685 /* local diagonal sweep */ 2686 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx)); 2687 } 2688 PetscCall(VecDestroy(&bb1)); 2689 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2690 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2691 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2692 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2693 } else if (flag & SOR_EISENSTAT) { 2694 Vec xx1; 2695 PetscBool hasop; 2696 const PetscScalar *diag; 2697 PetscScalar *sl,scale = (omega - 2.0)/omega; 2698 PetscInt i,n; 2699 2700 if (!mat->xx1) { 2701 PetscCall(VecDuplicate(bb,&mat->xx1)); 2702 PetscCall(VecDuplicate(bb,&mat->bb1)); 2703 } 2704 xx1 = mat->xx1; 2705 bb1 = mat->bb1; 2706 2707 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 2708 2709 if (!mat->diag) { 2710 /* this is wrong for same matrix with new nonzero values */ 2711 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 2712 PetscCall(MatGetDiagonal(matin,mat->diag)); 2713 } 2714 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 2715 2716 if (hasop) { 2717 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 2718 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2719 } else { 2720 /* 2721 These two lines are replaced by code that may be a bit faster for a good compiler 2722 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2723 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2724 */ 2725 PetscCall(VecGetArray(mat->slvec1a,&sl)); 2726 PetscCall(VecGetArrayRead(mat->diag,&diag)); 2727 PetscCall(VecGetArrayRead(bb,&b)); 2728 PetscCall(VecGetArray(xx,&x)); 2729 PetscCall(VecGetLocalSize(xx,&n)); 2730 if (omega == 1.0) { 2731 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2732 PetscCall(PetscLogFlops(2.0*n)); 2733 } else { 2734 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2735 PetscCall(PetscLogFlops(3.0*n)); 2736 } 2737 PetscCall(VecRestoreArray(mat->slvec1a,&sl)); 2738 PetscCall(VecRestoreArrayRead(mat->diag,&diag)); 2739 PetscCall(VecRestoreArrayRead(bb,&b)); 2740 PetscCall(VecRestoreArray(xx,&x)); 2741 } 2742 2743 /* multiply off-diagonal portion of matrix */ 2744 PetscCall(VecSet(mat->slvec1b,0.0)); 2745 PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b)); 2746 PetscCall(VecGetArray(mat->slvec0,&from)); 2747 PetscCall(VecGetArray(xx,&x)); 2748 PetscCall(PetscArraycpy(from,x,bs*mbs)); 2749 PetscCall(VecRestoreArray(mat->slvec0,&from)); 2750 PetscCall(VecRestoreArray(xx,&x)); 2751 PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2752 PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD)); 2753 PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a)); 2754 2755 /* local sweep */ 2756 PetscCall((*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 2757 PetscCall(VecAXPY(xx,1.0,xx1)); 2758 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2759 PetscFunctionReturn(0); 2760 } 2761 2762 /*@ 2763 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2764 CSR format the local rows. 2765 2766 Collective 2767 2768 Input Parameters: 2769 + comm - MPI communicator 2770 . bs - the block size, only a block size of 1 is supported 2771 . m - number of local rows (Cannot be PETSC_DECIDE) 2772 . n - This value should be the same as the local size used in creating the 2773 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2774 calculated if N is given) For square matrices n is almost always m. 2775 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2776 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2777 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2778 . j - column indices 2779 - a - matrix values 2780 2781 Output Parameter: 2782 . mat - the matrix 2783 2784 Level: intermediate 2785 2786 Notes: 2787 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2788 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2789 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2790 2791 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2792 2793 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2794 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2795 @*/ 2796 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2797 { 2798 PetscFunctionBegin; 2799 PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2800 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2801 PetscCall(MatCreate(comm,mat)); 2802 PetscCall(MatSetSizes(*mat,m,n,M,N)); 2803 PetscCall(MatSetType(*mat,MATMPISBAIJ)); 2804 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a)); 2805 PetscFunctionReturn(0); 2806 } 2807 2808 /*@C 2809 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 2810 2811 Collective 2812 2813 Input Parameters: 2814 + B - the matrix 2815 . bs - the block size 2816 . i - the indices into j for the start of each local row (starts with zero) 2817 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2818 - v - optional values in the matrix 2819 2820 Level: advanced 2821 2822 Notes: 2823 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2824 and usually the numerical values as well 2825 2826 Any entries below the diagonal are ignored 2827 2828 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2829 @*/ 2830 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2831 { 2832 PetscFunctionBegin; 2833 PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 2834 PetscFunctionReturn(0); 2835 } 2836 2837 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2838 { 2839 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2840 PetscInt *indx; 2841 PetscScalar *values; 2842 2843 PetscFunctionBegin; 2844 PetscCall(MatGetSize(inmat,&m,&N)); 2845 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2846 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2847 PetscInt *dnz,*onz,mbs,Nbs,nbs; 2848 PetscInt *bindx,rmax=a->rmax,j; 2849 PetscMPIInt rank,size; 2850 2851 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 2852 mbs = m/bs; Nbs = N/cbs; 2853 if (n == PETSC_DECIDE) { 2854 PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N)); 2855 } 2856 nbs = n/cbs; 2857 2858 PetscCall(PetscMalloc1(rmax,&bindx)); 2859 MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */ 2860 2861 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 2862 PetscCallMPI(MPI_Comm_rank(comm,&size)); 2863 if (rank == size-1) { 2864 /* Check sum(nbs) = Nbs */ 2865 PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 2866 } 2867 2868 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2869 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 2870 for (i=0; i<mbs; i++) { 2871 PetscCall(MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */ 2872 nnz = nnz/bs; 2873 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2874 PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz)); 2875 PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); 2876 } 2877 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 2878 PetscCall(PetscFree(bindx)); 2879 2880 PetscCall(MatCreate(comm,outmat)); 2881 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 2882 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 2883 PetscCall(MatSetType(*outmat,MATSBAIJ)); 2884 PetscCall(MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz)); 2885 PetscCall(MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz)); 2886 MatPreallocateEnd(dnz,onz); 2887 } 2888 2889 /* numeric phase */ 2890 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 2891 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 2892 2893 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE)); 2894 for (i=0; i<m; i++) { 2895 PetscCall(MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values)); 2896 Ii = i + rstart; 2897 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 2898 PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values)); 2899 } 2900 PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE)); 2901 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 2902 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 2903 PetscFunctionReturn(0); 2904 } 2905