1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 /* This could be moved to matimpl.h */ 12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 13 { 14 Mat preallocator; 15 PetscInt r,rstart,rend; 16 PetscInt bs,i,m,n,M,N; 17 PetscBool cong = PETSC_TRUE; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 22 PetscValidLogicalCollectiveInt(B,nm,2); 23 for (i = 0; i < nm; i++) { 24 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 25 ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 26 if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 27 } 28 PetscValidLogicalCollectiveBool(B,fill,5); 29 ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 30 ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 31 ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 32 ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 33 ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 34 ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 35 ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 36 ierr = MatSetUp(preallocator);CHKERRQ(ierr); 37 ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 38 for (r = rstart; r < rend; ++r) { 39 PetscInt ncols; 40 const PetscInt *row; 41 const PetscScalar *vals; 42 43 for (i = 0; i < nm; i++) { 44 ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 45 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 46 if (symm && symm[i]) { 47 ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 48 } 49 ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 50 } 51 } 52 ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 53 ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 54 ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 55 ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 static PetscErrorCode MatConvert_MPISBAIJ_XAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 60 { 61 Mat B; 62 PetscErrorCode ierr; 63 PetscInt r; 64 65 PetscFunctionBegin; 66 if (reuse != MAT_REUSE_MATRIX) { 67 PetscBool symm = PETSC_TRUE; 68 PetscInt bs; 69 70 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 71 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 72 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 73 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 74 ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 75 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 76 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 77 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 78 ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 79 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 80 } else B = *newmat; 81 82 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 83 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 84 PetscInt ncols; 85 const PetscInt *row; 86 const PetscScalar *vals; 87 88 ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 89 ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 90 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 91 ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 92 } 93 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 94 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96 97 if (reuse == MAT_INPLACE_MATRIX) { 98 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 99 } else { 100 *newmat = B; 101 } 102 PetscFunctionReturn(0); 103 } 104 105 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 106 { 107 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 108 PetscErrorCode ierr; 109 110 PetscFunctionBegin; 111 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 112 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 117 { 118 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 119 PetscErrorCode ierr; 120 121 PetscFunctionBegin; 122 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 123 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 124 PetscFunctionReturn(0); 125 } 126 127 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 128 { \ 129 \ 130 brow = row/bs; \ 131 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 132 rmax = aimax[brow]; nrow = ailen[brow]; \ 133 bcol = col/bs; \ 134 ridx = row % bs; cidx = col % bs; \ 135 low = 0; high = nrow; \ 136 while (high-low > 3) { \ 137 t = (low+high)/2; \ 138 if (rp[t] > bcol) high = t; \ 139 else low = t; \ 140 } \ 141 for (_i=low; _i<high; _i++) { \ 142 if (rp[_i] > bcol) break; \ 143 if (rp[_i] == bcol) { \ 144 bap = ap + bs2*_i + bs*cidx + ridx; \ 145 if (addv == ADD_VALUES) *bap += value; \ 146 else *bap = value; \ 147 goto a_noinsert; \ 148 } \ 149 } \ 150 if (a->nonew == 1) goto a_noinsert; \ 151 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 152 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 153 N = nrow++ - 1; \ 154 /* shift up all the later entries in this row */ \ 155 for (ii=N; ii>=_i; ii--) { \ 156 rp[ii+1] = rp[ii]; \ 157 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 158 } \ 159 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 160 rp[_i] = bcol; \ 161 ap[bs2*_i + bs*cidx + ridx] = value; \ 162 A->nonzerostate++;\ 163 a_noinsert:; \ 164 ailen[brow] = nrow; \ 165 } 166 167 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 168 { \ 169 brow = row/bs; \ 170 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 171 rmax = bimax[brow]; nrow = bilen[brow]; \ 172 bcol = col/bs; \ 173 ridx = row % bs; cidx = col % bs; \ 174 low = 0; high = nrow; \ 175 while (high-low > 3) { \ 176 t = (low+high)/2; \ 177 if (rp[t] > bcol) high = t; \ 178 else low = t; \ 179 } \ 180 for (_i=low; _i<high; _i++) { \ 181 if (rp[_i] > bcol) break; \ 182 if (rp[_i] == bcol) { \ 183 bap = ap + bs2*_i + bs*cidx + ridx; \ 184 if (addv == ADD_VALUES) *bap += value; \ 185 else *bap = value; \ 186 goto b_noinsert; \ 187 } \ 188 } \ 189 if (b->nonew == 1) goto b_noinsert; \ 190 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 191 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 192 N = nrow++ - 1; \ 193 /* shift up all the later entries in this row */ \ 194 for (ii=N; ii>=_i; ii--) { \ 195 rp[ii+1] = rp[ii]; \ 196 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 197 } \ 198 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 199 rp[_i] = bcol; \ 200 ap[bs2*_i + bs*cidx + ridx] = value; \ 201 B->nonzerostate++;\ 202 b_noinsert:; \ 203 bilen[brow] = nrow; \ 204 } 205 206 /* Only add/insert a(i,j) with i<=j (blocks). 207 Any a(i,j) with i>j input by user is ingored. 208 */ 209 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 210 { 211 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 212 MatScalar value; 213 PetscBool roworiented = baij->roworiented; 214 PetscErrorCode ierr; 215 PetscInt i,j,row,col; 216 PetscInt rstart_orig=mat->rmap->rstart; 217 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 218 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 219 220 /* Some Variables required in the macro */ 221 Mat A = baij->A; 222 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 223 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 224 MatScalar *aa =a->a; 225 226 Mat B = baij->B; 227 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 228 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 229 MatScalar *ba =b->a; 230 231 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 232 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 233 MatScalar *ap,*bap; 234 235 /* for stash */ 236 PetscInt n_loc, *in_loc = NULL; 237 MatScalar *v_loc = NULL; 238 239 PetscFunctionBegin; 240 if (!baij->donotstash) { 241 if (n > baij->n_loc) { 242 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 243 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 244 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 245 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 246 247 baij->n_loc = n; 248 } 249 in_loc = baij->in_loc; 250 v_loc = baij->v_loc; 251 } 252 253 for (i=0; i<m; i++) { 254 if (im[i] < 0) continue; 255 #if defined(PETSC_USE_DEBUG) 256 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 257 #endif 258 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 259 row = im[i] - rstart_orig; /* local row index */ 260 for (j=0; j<n; j++) { 261 if (im[i]/bs > in[j]/bs) { 262 if (a->ignore_ltriangular) { 263 continue; /* ignore lower triangular blocks */ 264 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 265 } 266 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 267 col = in[j] - cstart_orig; /* local col index */ 268 brow = row/bs; bcol = col/bs; 269 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 270 if (roworiented) value = v[i*n+j]; 271 else value = v[i+j*m]; 272 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 273 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 274 } else if (in[j] < 0) continue; 275 #if defined(PETSC_USE_DEBUG) 276 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 277 #endif 278 else { /* off-diag entry (B) */ 279 if (mat->was_assembled) { 280 if (!baij->colmap) { 281 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 282 } 283 #if defined(PETSC_USE_CTABLE) 284 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j]/bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 290 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ*)(B)->data; 295 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 296 ba = b->a; 297 } else col += in[j]%bs; 298 } else col = in[j]; 299 if (roworiented) value = v[i*n+j]; 300 else value = v[i+j*m]; 301 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 302 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 303 } 304 } 305 } else { /* off processor entry */ 306 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 307 if (!baij->donotstash) { 308 mat->assembled = PETSC_FALSE; 309 n_loc = 0; 310 for (j=0; j<n; j++) { 311 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 312 in_loc[n_loc] = in[j]; 313 if (roworiented) { 314 v_loc[n_loc] = v[i*n+j]; 315 } else { 316 v_loc[n_loc] = v[j*m+i]; 317 } 318 n_loc++; 319 } 320 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 321 } 322 } 323 } 324 PetscFunctionReturn(0); 325 } 326 327 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 328 { 329 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 330 PetscErrorCode ierr; 331 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 332 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 333 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 334 PetscBool roworiented=a->roworiented; 335 const PetscScalar *value = v; 336 MatScalar *ap,*aa = a->a,*bap; 337 338 PetscFunctionBegin; 339 if (col < row) { 340 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 341 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 342 } 343 rp = aj + ai[row]; 344 ap = aa + bs2*ai[row]; 345 rmax = imax[row]; 346 nrow = ailen[row]; 347 value = v; 348 low = 0; 349 high = nrow; 350 351 while (high-low > 7) { 352 t = (low+high)/2; 353 if (rp[t] > col) high = t; 354 else low = t; 355 } 356 for (i=low; i<high; i++) { 357 if (rp[i] > col) break; 358 if (rp[i] == col) { 359 bap = ap + bs2*i; 360 if (roworiented) { 361 if (is == ADD_VALUES) { 362 for (ii=0; ii<bs; ii++) { 363 for (jj=ii; jj<bs2; jj+=bs) { 364 bap[jj] += *value++; 365 } 366 } 367 } else { 368 for (ii=0; ii<bs; ii++) { 369 for (jj=ii; jj<bs2; jj+=bs) { 370 bap[jj] = *value++; 371 } 372 } 373 } 374 } else { 375 if (is == ADD_VALUES) { 376 for (ii=0; ii<bs; ii++) { 377 for (jj=0; jj<bs; jj++) { 378 *bap++ += *value++; 379 } 380 } 381 } else { 382 for (ii=0; ii<bs; ii++) { 383 for (jj=0; jj<bs; jj++) { 384 *bap++ = *value++; 385 } 386 } 387 } 388 } 389 goto noinsert2; 390 } 391 } 392 if (nonew == 1) goto noinsert2; 393 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 394 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 395 N = nrow++ - 1; high++; 396 /* shift up all the later entries in this row */ 397 for (ii=N; ii>=i; ii--) { 398 rp[ii+1] = rp[ii]; 399 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 400 } 401 if (N >= i) { 402 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 403 } 404 rp[i] = col; 405 bap = ap + bs2*i; 406 if (roworiented) { 407 for (ii=0; ii<bs; ii++) { 408 for (jj=ii; jj<bs2; jj+=bs) { 409 bap[jj] = *value++; 410 } 411 } 412 } else { 413 for (ii=0; ii<bs; ii++) { 414 for (jj=0; jj<bs; jj++) { 415 *bap++ = *value++; 416 } 417 } 418 } 419 noinsert2:; 420 ailen[row] = nrow; 421 PetscFunctionReturn(0); 422 } 423 424 /* 425 This routine is exactly duplicated in mpibaij.c 426 */ 427 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 428 { 429 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 430 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 431 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 432 PetscErrorCode ierr; 433 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 434 PetscBool roworiented=a->roworiented; 435 const PetscScalar *value = v; 436 MatScalar *ap,*aa = a->a,*bap; 437 438 PetscFunctionBegin; 439 rp = aj + ai[row]; 440 ap = aa + bs2*ai[row]; 441 rmax = imax[row]; 442 nrow = ailen[row]; 443 low = 0; 444 high = nrow; 445 value = v; 446 while (high-low > 7) { 447 t = (low+high)/2; 448 if (rp[t] > col) high = t; 449 else low = t; 450 } 451 for (i=low; i<high; i++) { 452 if (rp[i] > col) break; 453 if (rp[i] == col) { 454 bap = ap + bs2*i; 455 if (roworiented) { 456 if (is == ADD_VALUES) { 457 for (ii=0; ii<bs; ii++) { 458 for (jj=ii; jj<bs2; jj+=bs) { 459 bap[jj] += *value++; 460 } 461 } 462 } else { 463 for (ii=0; ii<bs; ii++) { 464 for (jj=ii; jj<bs2; jj+=bs) { 465 bap[jj] = *value++; 466 } 467 } 468 } 469 } else { 470 if (is == ADD_VALUES) { 471 for (ii=0; ii<bs; ii++,value+=bs) { 472 for (jj=0; jj<bs; jj++) { 473 bap[jj] += value[jj]; 474 } 475 bap += bs; 476 } 477 } else { 478 for (ii=0; ii<bs; ii++,value+=bs) { 479 for (jj=0; jj<bs; jj++) { 480 bap[jj] = value[jj]; 481 } 482 bap += bs; 483 } 484 } 485 } 486 goto noinsert2; 487 } 488 } 489 if (nonew == 1) goto noinsert2; 490 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 491 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 492 N = nrow++ - 1; high++; 493 /* shift up all the later entries in this row */ 494 for (ii=N; ii>=i; ii--) { 495 rp[ii+1] = rp[ii]; 496 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 497 } 498 if (N >= i) { 499 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 500 } 501 rp[i] = col; 502 bap = ap + bs2*i; 503 if (roworiented) { 504 for (ii=0; ii<bs; ii++) { 505 for (jj=ii; jj<bs2; jj+=bs) { 506 bap[jj] = *value++; 507 } 508 } 509 } else { 510 for (ii=0; ii<bs; ii++) { 511 for (jj=0; jj<bs; jj++) { 512 *bap++ = *value++; 513 } 514 } 515 } 516 noinsert2:; 517 ailen[row] = nrow; 518 PetscFunctionReturn(0); 519 } 520 521 /* 522 This routine could be optimized by removing the need for the block copy below and passing stride information 523 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 524 */ 525 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 526 { 527 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 528 const MatScalar *value; 529 MatScalar *barray =baij->barray; 530 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 531 PetscErrorCode ierr; 532 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 533 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 534 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 535 536 PetscFunctionBegin; 537 if (!barray) { 538 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 539 baij->barray = barray; 540 } 541 542 if (roworiented) { 543 stepval = (n-1)*bs; 544 } else { 545 stepval = (m-1)*bs; 546 } 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 551 #endif 552 if (im[i] >= rstart && im[i] < rend) { 553 row = im[i] - rstart; 554 for (j=0; j<n; j++) { 555 if (im[i] > in[j]) { 556 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 557 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 558 } 559 /* If NumCol = 1 then a copy is not required */ 560 if ((roworiented) && (n == 1)) { 561 barray = (MatScalar*) v + i*bs2; 562 } else if ((!roworiented) && (m == 1)) { 563 barray = (MatScalar*) v + j*bs2; 564 } else { /* Here a copy is required */ 565 if (roworiented) { 566 value = v + i*(stepval+bs)*bs + j*bs; 567 } else { 568 value = v + j*(stepval+bs)*bs + i*bs; 569 } 570 for (ii=0; ii<bs; ii++,value+=stepval) { 571 for (jj=0; jj<bs; jj++) { 572 *barray++ = *value++; 573 } 574 } 575 barray -=bs2; 576 } 577 578 if (in[j] >= cstart && in[j] < cend) { 579 col = in[j] - cstart; 580 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 581 } else if (in[j] < 0) continue; 582 #if defined(PETSC_USE_DEBUG) 583 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 584 #endif 585 else { 586 if (mat->was_assembled) { 587 if (!baij->colmap) { 588 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 589 } 590 591 #if defined(PETSC_USE_DEBUG) 592 #if defined(PETSC_USE_CTABLE) 593 { PetscInt data; 594 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 595 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 596 } 597 #else 598 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 599 #endif 600 #endif 601 #if defined(PETSC_USE_CTABLE) 602 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 603 col = (col - 1)/bs; 604 #else 605 col = (baij->colmap[in[j]] - 1)/bs; 606 #endif 607 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 608 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 609 col = in[j]; 610 } 611 } else col = in[j]; 612 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 613 } 614 } 615 } else { 616 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 617 if (!baij->donotstash) { 618 if (roworiented) { 619 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 620 } else { 621 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 622 } 623 } 624 } 625 } 626 PetscFunctionReturn(0); 627 } 628 629 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 630 { 631 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 632 PetscErrorCode ierr; 633 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 634 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 635 636 PetscFunctionBegin; 637 for (i=0; i<m; i++) { 638 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 639 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 640 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 641 row = idxm[i] - bsrstart; 642 for (j=0; j<n; j++) { 643 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 644 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 645 if (idxn[j] >= bscstart && idxn[j] < bscend) { 646 col = idxn[j] - bscstart; 647 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 648 } else { 649 if (!baij->colmap) { 650 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 651 } 652 #if defined(PETSC_USE_CTABLE) 653 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 654 data--; 655 #else 656 data = baij->colmap[idxn[j]/bs]-1; 657 #endif 658 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 659 else { 660 col = data + idxn[j]%bs; 661 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 662 } 663 } 664 } 665 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 666 } 667 PetscFunctionReturn(0); 668 } 669 670 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 671 { 672 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 673 PetscErrorCode ierr; 674 PetscReal sum[2],*lnorm2; 675 676 PetscFunctionBegin; 677 if (baij->size == 1) { 678 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 679 } else { 680 if (type == NORM_FROBENIUS) { 681 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 682 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 683 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 684 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 685 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 686 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 687 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 688 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 690 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 691 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 692 PetscReal *rsum,*rsum2,vabs; 693 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 694 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 695 MatScalar *v; 696 697 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 698 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 699 /* Amat */ 700 v = amat->a; jj = amat->j; 701 for (brow=0; brow<mbs; brow++) { 702 grow = bs*(rstart + brow); 703 nz = amat->i[brow+1] - amat->i[brow]; 704 for (bcol=0; bcol<nz; bcol++) { 705 gcol = bs*(rstart + *jj); jj++; 706 for (col=0; col<bs; col++) { 707 for (row=0; row<bs; row++) { 708 vabs = PetscAbsScalar(*v); v++; 709 rsum[gcol+col] += vabs; 710 /* non-diagonal block */ 711 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 712 } 713 } 714 } 715 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 716 } 717 /* Bmat */ 718 v = bmat->a; jj = bmat->j; 719 for (brow=0; brow<mbs; brow++) { 720 grow = bs*(rstart + brow); 721 nz = bmat->i[brow+1] - bmat->i[brow]; 722 for (bcol=0; bcol<nz; bcol++) { 723 gcol = bs*garray[*jj]; jj++; 724 for (col=0; col<bs; col++) { 725 for (row=0; row<bs; row++) { 726 vabs = PetscAbsScalar(*v); v++; 727 rsum[gcol+col] += vabs; 728 rsum[grow+row] += vabs; 729 } 730 } 731 } 732 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 733 } 734 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 735 *norm = 0.0; 736 for (col=0; col<mat->cmap->N; col++) { 737 if (rsum2[col] > *norm) *norm = rsum2[col]; 738 } 739 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 740 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 741 } 742 PetscFunctionReturn(0); 743 } 744 745 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 746 { 747 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 748 PetscErrorCode ierr; 749 PetscInt nstash,reallocs; 750 751 PetscFunctionBegin; 752 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 753 754 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 755 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 756 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 757 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 758 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 759 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 764 { 765 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 766 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 767 PetscErrorCode ierr; 768 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 769 PetscInt *row,*col; 770 PetscBool other_disassembled; 771 PetscMPIInt n; 772 PetscBool r1,r2,r3; 773 MatScalar *val; 774 775 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 776 PetscFunctionBegin; 777 if (!baij->donotstash && !mat->nooffprocentries) { 778 while (1) { 779 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 780 if (!flg) break; 781 782 for (i=0; i<n;) { 783 /* Now identify the consecutive vals belonging to the same row */ 784 for (j=i,rstart=row[j]; j<n; j++) { 785 if (row[j] != rstart) break; 786 } 787 if (j < n) ncols = j-i; 788 else ncols = n-i; 789 /* Now assemble all these values with a single function call */ 790 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 791 i = j; 792 } 793 } 794 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 795 /* Now process the block-stash. Since the values are stashed column-oriented, 796 set the roworiented flag to column oriented, and after MatSetValues() 797 restore the original flags */ 798 r1 = baij->roworiented; 799 r2 = a->roworiented; 800 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 801 802 baij->roworiented = PETSC_FALSE; 803 a->roworiented = PETSC_FALSE; 804 805 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 806 while (1) { 807 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 808 if (!flg) break; 809 810 for (i=0; i<n;) { 811 /* Now identify the consecutive vals belonging to the same row */ 812 for (j=i,rstart=row[j]; j<n; j++) { 813 if (row[j] != rstart) break; 814 } 815 if (j < n) ncols = j-i; 816 else ncols = n-i; 817 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 818 i = j; 819 } 820 } 821 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 822 823 baij->roworiented = r1; 824 a->roworiented = r2; 825 826 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 827 } 828 829 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 830 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 831 832 /* determine if any processor has disassembled, if so we must 833 also disassemble ourselfs, in order that we may reassemble. */ 834 /* 835 if nonzero structure of submatrix B cannot change then we know that 836 no processor disassembled thus we can skip this stuff 837 */ 838 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 839 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 840 if (mat->was_assembled && !other_disassembled) { 841 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 842 } 843 } 844 845 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 846 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 847 } 848 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 849 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 850 851 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 852 853 baij->rowvalues = 0; 854 855 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 856 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 857 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 858 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 859 } 860 PetscFunctionReturn(0); 861 } 862 863 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 864 #include <petscdraw.h> 865 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 866 { 867 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 868 PetscErrorCode ierr; 869 PetscInt bs = mat->rmap->bs; 870 PetscMPIInt rank = baij->rank; 871 PetscBool iascii,isdraw; 872 PetscViewer sviewer; 873 PetscViewerFormat format; 874 875 PetscFunctionBegin; 876 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 877 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 878 if (iascii) { 879 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 880 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 881 MatInfo info; 882 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 883 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 884 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 885 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 886 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 887 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 888 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 890 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 891 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 892 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 893 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 894 PetscFunctionReturn(0); 895 } else if (format == PETSC_VIEWER_ASCII_INFO) { 896 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 899 PetscFunctionReturn(0); 900 } 901 } 902 903 if (isdraw) { 904 PetscDraw draw; 905 PetscBool isnull; 906 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 907 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 908 if (isnull) PetscFunctionReturn(0); 909 } 910 911 { 912 /* assemble the entire matrix onto first processor. */ 913 Mat A; 914 Mat_SeqSBAIJ *Aloc; 915 Mat_SeqBAIJ *Bloc; 916 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 917 MatScalar *a; 918 const char *matname; 919 920 /* Should this be the same type as mat? */ 921 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 922 if (!rank) { 923 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 924 } else { 925 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 926 } 927 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 928 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 929 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 930 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 931 932 /* copy over the A part */ 933 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 934 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 935 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 936 937 for (i=0; i<mbs; i++) { 938 rvals[0] = bs*(baij->rstartbs + i); 939 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 940 for (j=ai[i]; j<ai[i+1]; j++) { 941 col = (baij->cstartbs+aj[j])*bs; 942 for (k=0; k<bs; k++) { 943 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 944 col++; 945 a += bs; 946 } 947 } 948 } 949 /* copy over the B part */ 950 Bloc = (Mat_SeqBAIJ*)baij->B->data; 951 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 952 for (i=0; i<mbs; i++) { 953 954 rvals[0] = bs*(baij->rstartbs + i); 955 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 956 for (j=ai[i]; j<ai[i+1]; j++) { 957 col = baij->garray[aj[j]]*bs; 958 for (k=0; k<bs; k++) { 959 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 960 col++; 961 a += bs; 962 } 963 } 964 } 965 ierr = PetscFree(rvals);CHKERRQ(ierr); 966 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 968 /* 969 Everyone has to call to draw the matrix since the graphics waits are 970 synchronized across all processors that share the PetscDraw object 971 */ 972 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 973 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 974 if (!rank) { 975 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 976 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 977 } 978 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 979 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 980 ierr = MatDestroy(&A);CHKERRQ(ierr); 981 } 982 PetscFunctionReturn(0); 983 } 984 985 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 986 { 987 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 988 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 989 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 990 PetscErrorCode ierr; 991 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 992 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 993 int fd; 994 PetscScalar *column_values; 995 FILE *file; 996 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 997 PetscInt message_count,flowcontrolcount; 998 999 PetscFunctionBegin; 1000 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1001 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1002 nz = bs2*(A->nz + B->nz); 1003 rlen = mat->rmap->n; 1004 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1005 if (!rank) { 1006 header[0] = MAT_FILE_CLASSID; 1007 header[1] = mat->rmap->N; 1008 header[2] = mat->cmap->N; 1009 1010 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1011 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1012 /* get largest number of rows any processor has */ 1013 range = mat->rmap->range; 1014 for (i=1; i<size; i++) { 1015 rlen = PetscMax(rlen,range[i+1] - range[i]); 1016 } 1017 } else { 1018 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1019 } 1020 1021 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1022 /* compute lengths of each row */ 1023 for (i=0; i<a->mbs; i++) { 1024 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1025 } 1026 /* store the row lengths to the file */ 1027 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1028 if (!rank) { 1029 MPI_Status status; 1030 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1031 rlen = (range[1] - range[0])/bs; 1032 for (i=0; i<rlen; i++) { 1033 for (j=0; j<bs; j++) { 1034 row_lens[i*bs+j] = bs*crow_lens[i]; 1035 } 1036 } 1037 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1038 for (i=1; i<size; i++) { 1039 rlen = (range[i+1] - range[i])/bs; 1040 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1041 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1042 for (k=0; k<rlen; k++) { 1043 for (j=0; j<bs; j++) { 1044 row_lens[k*bs+j] = bs*crow_lens[k]; 1045 } 1046 } 1047 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1048 } 1049 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1050 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1051 } else { 1052 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1053 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1054 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1055 } 1056 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1057 1058 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1059 information needed to make it for each row from a block row. This does require more communication but still not more than 1060 the communication needed for the nonzero values */ 1061 nzmax = nz; /* space a largest processor needs */ 1062 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1063 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1064 cnt = 0; 1065 for (i=0; i<a->mbs; i++) { 1066 pcnt = cnt; 1067 for (j=B->i[i]; j<B->i[i+1]; j++) { 1068 if ((col = garray[B->j[j]]) > cstart) break; 1069 for (l=0; l<bs; l++) { 1070 column_indices[cnt++] = bs*col+l; 1071 } 1072 } 1073 for (k=A->i[i]; k<A->i[i+1]; k++) { 1074 for (l=0; l<bs; l++) { 1075 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1076 } 1077 } 1078 for (; j<B->i[i+1]; j++) { 1079 for (l=0; l<bs; l++) { 1080 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1081 } 1082 } 1083 len = cnt - pcnt; 1084 for (k=1; k<bs; k++) { 1085 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1086 cnt += len; 1087 } 1088 } 1089 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1090 1091 /* store the columns to the file */ 1092 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1093 if (!rank) { 1094 MPI_Status status; 1095 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1096 for (i=1; i<size; i++) { 1097 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1098 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1099 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1100 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1101 } 1102 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1103 } else { 1104 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1105 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1106 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1107 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1108 } 1109 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1110 1111 /* load up the numerical values */ 1112 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1113 cnt = 0; 1114 for (i=0; i<a->mbs; i++) { 1115 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1116 for (j=B->i[i]; j<B->i[i+1]; j++) { 1117 if (garray[B->j[j]] > cstart) break; 1118 for (l=0; l<bs; l++) { 1119 for (ll=0; ll<bs; ll++) { 1120 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1121 } 1122 } 1123 cnt += bs; 1124 } 1125 for (k=A->i[i]; k<A->i[i+1]; k++) { 1126 for (l=0; l<bs; l++) { 1127 for (ll=0; ll<bs; ll++) { 1128 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1129 } 1130 } 1131 cnt += bs; 1132 } 1133 for (; j<B->i[i+1]; j++) { 1134 for (l=0; l<bs; l++) { 1135 for (ll=0; ll<bs; ll++) { 1136 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1137 } 1138 } 1139 cnt += bs; 1140 } 1141 cnt += (bs-1)*rlen; 1142 } 1143 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1144 1145 /* store the column values to the file */ 1146 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1147 if (!rank) { 1148 MPI_Status status; 1149 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1150 for (i=1; i<size; i++) { 1151 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1152 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1153 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1154 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1155 } 1156 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1157 } else { 1158 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1159 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1160 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1161 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1162 } 1163 ierr = PetscFree(column_values);CHKERRQ(ierr); 1164 1165 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1166 if (file) { 1167 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1173 { 1174 PetscErrorCode ierr; 1175 PetscBool iascii,isdraw,issocket,isbinary; 1176 1177 PetscFunctionBegin; 1178 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1179 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1180 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1181 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1182 if (iascii || isdraw || issocket) { 1183 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1184 } else if (isbinary) { 1185 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1186 } 1187 PetscFunctionReturn(0); 1188 } 1189 1190 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1191 { 1192 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 #if defined(PETSC_USE_LOG) 1197 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1198 #endif 1199 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1200 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1201 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1202 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1203 #if defined(PETSC_USE_CTABLE) 1204 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1205 #else 1206 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1207 #endif 1208 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1209 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1210 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1211 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1212 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1213 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1214 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1215 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1216 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1217 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1218 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1219 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1220 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1221 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1222 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1223 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1224 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1225 #endif 1226 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1227 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1228 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1229 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1230 1231 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1235 #if defined(PETSC_HAVE_ELEMENTAL) 1236 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1237 #endif 1238 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1244 { 1245 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1246 PetscErrorCode ierr; 1247 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1248 PetscScalar *from; 1249 const PetscScalar *x; 1250 1251 PetscFunctionBegin; 1252 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1253 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1254 1255 /* diagonal part */ 1256 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1257 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1258 1259 /* subdiagonal part */ 1260 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1261 1262 /* copy x into the vec slvec0 */ 1263 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1264 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1265 1266 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1267 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1268 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1269 1270 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1271 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1272 /* supperdiagonal part */ 1273 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1278 { 1279 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1280 PetscErrorCode ierr; 1281 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1282 PetscScalar *from; 1283 const PetscScalar *x; 1284 1285 PetscFunctionBegin; 1286 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1287 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1288 1289 /* diagonal part */ 1290 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1291 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1292 1293 /* subdiagonal part */ 1294 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1295 1296 /* copy x into the vec slvec0 */ 1297 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1298 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1299 1300 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1301 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1302 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1303 1304 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1305 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1306 /* supperdiagonal part */ 1307 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1312 { 1313 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1314 PetscErrorCode ierr; 1315 PetscInt nt; 1316 1317 PetscFunctionBegin; 1318 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1319 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1320 1321 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1322 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1323 1324 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1325 /* do diagonal part */ 1326 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1327 /* do supperdiagonal part */ 1328 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1329 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1330 /* do subdiagonal part */ 1331 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1332 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1333 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1338 { 1339 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1340 PetscErrorCode ierr; 1341 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1342 PetscScalar *from,zero=0.0; 1343 const PetscScalar *x; 1344 1345 PetscFunctionBegin; 1346 /* 1347 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1348 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1349 */ 1350 /* diagonal part */ 1351 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1352 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1353 1354 /* subdiagonal part */ 1355 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1356 1357 /* copy x into the vec slvec0 */ 1358 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1359 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1360 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1361 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1362 1363 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1364 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1365 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1366 1367 /* supperdiagonal part */ 1368 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1373 { 1374 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1379 /* do diagonal part */ 1380 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1381 /* do supperdiagonal part */ 1382 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1383 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1384 1385 /* do subdiagonal part */ 1386 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1387 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1388 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 /* 1393 This only works correctly for square matrices where the subblock A->A is the 1394 diagonal block 1395 */ 1396 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1397 { 1398 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1399 PetscErrorCode ierr; 1400 1401 PetscFunctionBegin; 1402 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1403 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 1407 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1408 { 1409 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1414 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1415 PetscFunctionReturn(0); 1416 } 1417 1418 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1419 { 1420 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1421 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1422 PetscErrorCode ierr; 1423 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1424 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1425 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1426 1427 PetscFunctionBegin; 1428 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1429 mat->getrowactive = PETSC_TRUE; 1430 1431 if (!mat->rowvalues && (idx || v)) { 1432 /* 1433 allocate enough space to hold information from the longest row. 1434 */ 1435 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1436 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1437 PetscInt max = 1,mbs = mat->mbs,tmp; 1438 for (i=0; i<mbs; i++) { 1439 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1440 if (max < tmp) max = tmp; 1441 } 1442 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1443 } 1444 1445 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1446 lrow = row - brstart; /* local row index */ 1447 1448 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1449 if (!v) {pvA = 0; pvB = 0;} 1450 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1451 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1452 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1453 nztot = nzA + nzB; 1454 1455 cmap = mat->garray; 1456 if (v || idx) { 1457 if (nztot) { 1458 /* Sort by increasing column numbers, assuming A and B already sorted */ 1459 PetscInt imark = -1; 1460 if (v) { 1461 *v = v_p = mat->rowvalues; 1462 for (i=0; i<nzB; i++) { 1463 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1464 else break; 1465 } 1466 imark = i; 1467 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1468 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1469 } 1470 if (idx) { 1471 *idx = idx_p = mat->rowindices; 1472 if (imark > -1) { 1473 for (i=0; i<imark; i++) { 1474 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1475 } 1476 } else { 1477 for (i=0; i<nzB; i++) { 1478 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1479 else break; 1480 } 1481 imark = i; 1482 } 1483 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1484 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1485 } 1486 } else { 1487 if (idx) *idx = 0; 1488 if (v) *v = 0; 1489 } 1490 } 1491 *nz = nztot; 1492 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1493 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1494 PetscFunctionReturn(0); 1495 } 1496 1497 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1498 { 1499 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1500 1501 PetscFunctionBegin; 1502 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1503 baij->getrowactive = PETSC_FALSE; 1504 PetscFunctionReturn(0); 1505 } 1506 1507 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1508 { 1509 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1510 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1511 1512 PetscFunctionBegin; 1513 aA->getrow_utriangular = PETSC_TRUE; 1514 PetscFunctionReturn(0); 1515 } 1516 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1517 { 1518 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1519 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1520 1521 PetscFunctionBegin; 1522 aA->getrow_utriangular = PETSC_FALSE; 1523 PetscFunctionReturn(0); 1524 } 1525 1526 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1527 { 1528 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1529 PetscErrorCode ierr; 1530 1531 PetscFunctionBegin; 1532 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1533 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1534 PetscFunctionReturn(0); 1535 } 1536 1537 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1538 { 1539 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1544 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1549 Input: isrow - distributed(parallel), 1550 iscol_local - locally owned (seq) 1551 */ 1552 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1553 { 1554 PetscErrorCode ierr; 1555 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1556 const PetscInt *ptr1,*ptr2; 1557 1558 PetscFunctionBegin; 1559 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1560 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1561 if (sz1 > sz2) { 1562 *flg = PETSC_FALSE; 1563 PetscFunctionReturn(0); 1564 } 1565 1566 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1567 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1568 1569 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1570 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1571 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1572 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1573 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1574 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1575 1576 nmatch=0; 1577 k = 0; 1578 for (i=0; i<sz1; i++){ 1579 for (j=k; j<sz2; j++){ 1580 if (a1[i] == a2[j]) { 1581 k = j; nmatch++; 1582 break; 1583 } 1584 } 1585 } 1586 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1587 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1588 ierr = PetscFree(a1);CHKERRQ(ierr); 1589 ierr = PetscFree(a2);CHKERRQ(ierr); 1590 if (nmatch < sz1) { 1591 *flg = PETSC_FALSE; 1592 } else { 1593 *flg = PETSC_TRUE; 1594 } 1595 PetscFunctionReturn(0); 1596 } 1597 1598 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1599 { 1600 PetscErrorCode ierr; 1601 IS iscol_local; 1602 PetscInt csize; 1603 PetscBool isequal; 1604 1605 PetscFunctionBegin; 1606 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1607 if (call == MAT_REUSE_MATRIX) { 1608 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1609 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1610 } else { 1611 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1612 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1613 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1614 } 1615 1616 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1617 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1618 if (call == MAT_INITIAL_MATRIX) { 1619 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1620 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1621 } 1622 PetscFunctionReturn(0); 1623 } 1624 1625 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1626 { 1627 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1628 PetscErrorCode ierr; 1629 1630 PetscFunctionBegin; 1631 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1632 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1633 PetscFunctionReturn(0); 1634 } 1635 1636 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1637 { 1638 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1639 Mat A = a->A,B = a->B; 1640 PetscErrorCode ierr; 1641 PetscReal isend[5],irecv[5]; 1642 1643 PetscFunctionBegin; 1644 info->block_size = (PetscReal)matin->rmap->bs; 1645 1646 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1647 1648 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1649 isend[3] = info->memory; isend[4] = info->mallocs; 1650 1651 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1652 1653 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1654 isend[3] += info->memory; isend[4] += info->mallocs; 1655 if (flag == MAT_LOCAL) { 1656 info->nz_used = isend[0]; 1657 info->nz_allocated = isend[1]; 1658 info->nz_unneeded = isend[2]; 1659 info->memory = isend[3]; 1660 info->mallocs = isend[4]; 1661 } else if (flag == MAT_GLOBAL_MAX) { 1662 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1663 1664 info->nz_used = irecv[0]; 1665 info->nz_allocated = irecv[1]; 1666 info->nz_unneeded = irecv[2]; 1667 info->memory = irecv[3]; 1668 info->mallocs = irecv[4]; 1669 } else if (flag == MAT_GLOBAL_SUM) { 1670 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1671 1672 info->nz_used = irecv[0]; 1673 info->nz_allocated = irecv[1]; 1674 info->nz_unneeded = irecv[2]; 1675 info->memory = irecv[3]; 1676 info->mallocs = irecv[4]; 1677 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1678 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1679 info->fill_ratio_needed = 0; 1680 info->factor_mallocs = 0; 1681 PetscFunctionReturn(0); 1682 } 1683 1684 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1685 { 1686 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1687 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1688 PetscErrorCode ierr; 1689 1690 PetscFunctionBegin; 1691 switch (op) { 1692 case MAT_NEW_NONZERO_LOCATIONS: 1693 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1694 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1695 case MAT_KEEP_NONZERO_PATTERN: 1696 case MAT_SUBMAT_SINGLEIS: 1697 case MAT_NEW_NONZERO_LOCATION_ERR: 1698 MatCheckPreallocated(A,1); 1699 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1700 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1701 break; 1702 case MAT_ROW_ORIENTED: 1703 MatCheckPreallocated(A,1); 1704 a->roworiented = flg; 1705 1706 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1707 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1708 break; 1709 case MAT_NEW_DIAGONALS: 1710 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1711 break; 1712 case MAT_IGNORE_OFF_PROC_ENTRIES: 1713 a->donotstash = flg; 1714 break; 1715 case MAT_USE_HASH_TABLE: 1716 a->ht_flag = flg; 1717 break; 1718 case MAT_HERMITIAN: 1719 MatCheckPreallocated(A,1); 1720 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1721 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1722 #if defined(PETSC_USE_COMPLEX) 1723 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1724 #endif 1725 break; 1726 case MAT_SPD: 1727 A->spd_set = PETSC_TRUE; 1728 A->spd = flg; 1729 if (flg) { 1730 A->symmetric = PETSC_TRUE; 1731 A->structurally_symmetric = PETSC_TRUE; 1732 A->symmetric_set = PETSC_TRUE; 1733 A->structurally_symmetric_set = PETSC_TRUE; 1734 } 1735 break; 1736 case MAT_SYMMETRIC: 1737 MatCheckPreallocated(A,1); 1738 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1739 break; 1740 case MAT_STRUCTURALLY_SYMMETRIC: 1741 MatCheckPreallocated(A,1); 1742 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1743 break; 1744 case MAT_SYMMETRY_ETERNAL: 1745 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1746 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1747 break; 1748 case MAT_IGNORE_LOWER_TRIANGULAR: 1749 aA->ignore_ltriangular = flg; 1750 break; 1751 case MAT_ERROR_LOWER_TRIANGULAR: 1752 aA->ignore_ltriangular = flg; 1753 break; 1754 case MAT_GETROW_UPPERTRIANGULAR: 1755 aA->getrow_utriangular = flg; 1756 break; 1757 default: 1758 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1759 } 1760 PetscFunctionReturn(0); 1761 } 1762 1763 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1764 { 1765 PetscErrorCode ierr; 1766 1767 PetscFunctionBegin; 1768 if (reuse == MAT_INITIAL_MATRIX) { 1769 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1770 } else if (reuse == MAT_REUSE_MATRIX) { 1771 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1772 } 1773 PetscFunctionReturn(0); 1774 } 1775 1776 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1777 { 1778 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1779 Mat a = baij->A, b=baij->B; 1780 PetscErrorCode ierr; 1781 PetscInt nv,m,n; 1782 PetscBool flg; 1783 1784 PetscFunctionBegin; 1785 if (ll != rr) { 1786 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1787 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1788 } 1789 if (!ll) PetscFunctionReturn(0); 1790 1791 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1792 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1793 1794 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1795 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1796 1797 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1798 1799 /* left diagonalscale the off-diagonal part */ 1800 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1801 1802 /* scale the diagonal part */ 1803 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1804 1805 /* right diagonalscale the off-diagonal part */ 1806 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1807 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1808 PetscFunctionReturn(0); 1809 } 1810 1811 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1812 { 1813 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1814 PetscErrorCode ierr; 1815 1816 PetscFunctionBegin; 1817 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1818 PetscFunctionReturn(0); 1819 } 1820 1821 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1822 1823 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1824 { 1825 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1826 Mat a,b,c,d; 1827 PetscBool flg; 1828 PetscErrorCode ierr; 1829 1830 PetscFunctionBegin; 1831 a = matA->A; b = matA->B; 1832 c = matB->A; d = matB->B; 1833 1834 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1835 if (flg) { 1836 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1837 } 1838 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1843 { 1844 PetscErrorCode ierr; 1845 PetscBool isbaij; 1846 1847 PetscFunctionBegin; 1848 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 1849 if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1850 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1851 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1852 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1853 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1854 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1855 } else { 1856 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1857 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1858 1859 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1860 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1861 } 1862 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1863 PetscFunctionReturn(0); 1864 } 1865 1866 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1867 { 1868 PetscErrorCode ierr; 1869 1870 PetscFunctionBegin; 1871 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 1875 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1876 { 1877 PetscErrorCode ierr; 1878 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1879 PetscBLASInt bnz,one=1; 1880 Mat_SeqSBAIJ *xa,*ya; 1881 Mat_SeqBAIJ *xb,*yb; 1882 1883 PetscFunctionBegin; 1884 if (str == SAME_NONZERO_PATTERN) { 1885 PetscScalar alpha = a; 1886 xa = (Mat_SeqSBAIJ*)xx->A->data; 1887 ya = (Mat_SeqSBAIJ*)yy->A->data; 1888 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1889 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1890 xb = (Mat_SeqBAIJ*)xx->B->data; 1891 yb = (Mat_SeqBAIJ*)yy->B->data; 1892 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1893 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1894 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1895 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1896 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1897 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1898 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1899 } else { 1900 Mat B; 1901 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1902 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1903 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1904 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1905 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1906 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1907 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1908 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1909 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1910 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1911 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1912 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1913 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1914 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1915 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1916 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1917 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1918 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1919 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1920 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1921 } 1922 PetscFunctionReturn(0); 1923 } 1924 1925 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1926 { 1927 PetscErrorCode ierr; 1928 PetscInt i; 1929 PetscBool flg; 1930 1931 PetscFunctionBegin; 1932 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1933 for (i=0; i<n; i++) { 1934 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1935 if (!flg) { 1936 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1937 } 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1943 { 1944 PetscErrorCode ierr; 1945 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1946 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1947 1948 PetscFunctionBegin; 1949 if (!Y->preallocated) { 1950 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1951 } else if (!aij->nz) { 1952 PetscInt nonew = aij->nonew; 1953 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1954 aij->nonew = nonew; 1955 } 1956 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1957 PetscFunctionReturn(0); 1958 } 1959 1960 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1961 { 1962 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1963 PetscErrorCode ierr; 1964 1965 PetscFunctionBegin; 1966 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1967 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1968 if (d) { 1969 PetscInt rstart; 1970 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1971 *d += rstart/A->rmap->bs; 1972 1973 } 1974 PetscFunctionReturn(0); 1975 } 1976 1977 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1978 { 1979 PetscFunctionBegin; 1980 *a = ((Mat_MPISBAIJ*)A->data)->A; 1981 PetscFunctionReturn(0); 1982 } 1983 1984 /* -------------------------------------------------------------------*/ 1985 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1986 MatGetRow_MPISBAIJ, 1987 MatRestoreRow_MPISBAIJ, 1988 MatMult_MPISBAIJ, 1989 /* 4*/ MatMultAdd_MPISBAIJ, 1990 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1991 MatMultAdd_MPISBAIJ, 1992 0, 1993 0, 1994 0, 1995 /* 10*/ 0, 1996 0, 1997 0, 1998 MatSOR_MPISBAIJ, 1999 MatTranspose_MPISBAIJ, 2000 /* 15*/ MatGetInfo_MPISBAIJ, 2001 MatEqual_MPISBAIJ, 2002 MatGetDiagonal_MPISBAIJ, 2003 MatDiagonalScale_MPISBAIJ, 2004 MatNorm_MPISBAIJ, 2005 /* 20*/ MatAssemblyBegin_MPISBAIJ, 2006 MatAssemblyEnd_MPISBAIJ, 2007 MatSetOption_MPISBAIJ, 2008 MatZeroEntries_MPISBAIJ, 2009 /* 24*/ 0, 2010 0, 2011 0, 2012 0, 2013 0, 2014 /* 29*/ MatSetUp_MPISBAIJ, 2015 0, 2016 0, 2017 MatGetDiagonalBlock_MPISBAIJ, 2018 0, 2019 /* 34*/ MatDuplicate_MPISBAIJ, 2020 0, 2021 0, 2022 0, 2023 0, 2024 /* 39*/ MatAXPY_MPISBAIJ, 2025 MatCreateSubMatrices_MPISBAIJ, 2026 MatIncreaseOverlap_MPISBAIJ, 2027 MatGetValues_MPISBAIJ, 2028 MatCopy_MPISBAIJ, 2029 /* 44*/ 0, 2030 MatScale_MPISBAIJ, 2031 MatShift_MPISBAIJ, 2032 0, 2033 0, 2034 /* 49*/ 0, 2035 0, 2036 0, 2037 0, 2038 0, 2039 /* 54*/ 0, 2040 0, 2041 MatSetUnfactored_MPISBAIJ, 2042 0, 2043 MatSetValuesBlocked_MPISBAIJ, 2044 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 2045 0, 2046 0, 2047 0, 2048 0, 2049 /* 64*/ 0, 2050 0, 2051 0, 2052 0, 2053 0, 2054 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 2055 0, 2056 0, 2057 0, 2058 0, 2059 /* 74*/ 0, 2060 0, 2061 0, 2062 0, 2063 0, 2064 /* 79*/ 0, 2065 0, 2066 0, 2067 0, 2068 MatLoad_MPISBAIJ, 2069 /* 84*/ 0, 2070 0, 2071 0, 2072 0, 2073 0, 2074 /* 89*/ 0, 2075 0, 2076 0, 2077 0, 2078 0, 2079 /* 94*/ 0, 2080 0, 2081 0, 2082 0, 2083 0, 2084 /* 99*/ 0, 2085 0, 2086 0, 2087 0, 2088 0, 2089 /*104*/ 0, 2090 MatRealPart_MPISBAIJ, 2091 MatImaginaryPart_MPISBAIJ, 2092 MatGetRowUpperTriangular_MPISBAIJ, 2093 MatRestoreRowUpperTriangular_MPISBAIJ, 2094 /*109*/ 0, 2095 0, 2096 0, 2097 0, 2098 MatMissingDiagonal_MPISBAIJ, 2099 /*114*/ 0, 2100 0, 2101 0, 2102 0, 2103 0, 2104 /*119*/ 0, 2105 0, 2106 0, 2107 0, 2108 0, 2109 /*124*/ 0, 2110 0, 2111 0, 2112 0, 2113 0, 2114 /*129*/ 0, 2115 0, 2116 0, 2117 0, 2118 0, 2119 /*134*/ 0, 2120 0, 2121 0, 2122 0, 2123 0, 2124 /*139*/ MatSetBlockSizes_Default, 2125 0, 2126 0, 2127 0, 2128 0, 2129 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2130 }; 2131 2132 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2133 { 2134 Mat_MPISBAIJ *b; 2135 PetscErrorCode ierr; 2136 PetscInt i,mbs,Mbs; 2137 2138 PetscFunctionBegin; 2139 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2140 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2141 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2142 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2143 2144 b = (Mat_MPISBAIJ*)B->data; 2145 mbs = B->rmap->n/bs; 2146 Mbs = B->rmap->N/bs; 2147 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2148 2149 B->rmap->bs = bs; 2150 b->bs2 = bs*bs; 2151 b->mbs = mbs; 2152 b->Mbs = Mbs; 2153 b->nbs = B->cmap->n/bs; 2154 b->Nbs = B->cmap->N/bs; 2155 2156 for (i=0; i<=b->size; i++) { 2157 b->rangebs[i] = B->rmap->range[i]/bs; 2158 } 2159 b->rstartbs = B->rmap->rstart/bs; 2160 b->rendbs = B->rmap->rend/bs; 2161 2162 b->cstartbs = B->cmap->rstart/bs; 2163 b->cendbs = B->cmap->rend/bs; 2164 2165 #if defined(PETSC_USE_CTABLE) 2166 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2167 #else 2168 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2169 #endif 2170 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2171 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2172 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2173 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2174 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2175 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2176 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2177 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2178 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2179 2180 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2181 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2182 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2183 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2184 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2185 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2186 2187 if (!B->preallocated) { 2188 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2189 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2190 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2191 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2192 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2193 } 2194 2195 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2196 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2197 2198 B->preallocated = PETSC_TRUE; 2199 B->was_assembled = PETSC_FALSE; 2200 B->assembled = PETSC_FALSE; 2201 PetscFunctionReturn(0); 2202 } 2203 2204 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2205 { 2206 PetscInt m,rstart,cstart,cend; 2207 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2208 const PetscInt *JJ =0; 2209 PetscScalar *values=0; 2210 PetscErrorCode ierr; 2211 2212 PetscFunctionBegin; 2213 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2214 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2215 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2216 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2217 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2218 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2219 m = B->rmap->n/bs; 2220 rstart = B->rmap->rstart/bs; 2221 cstart = B->cmap->rstart/bs; 2222 cend = B->cmap->rend/bs; 2223 2224 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2225 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2226 for (i=0; i<m; i++) { 2227 nz = ii[i+1] - ii[i]; 2228 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2229 nz_max = PetscMax(nz_max,nz); 2230 JJ = jj + ii[i]; 2231 for (j=0; j<nz; j++) { 2232 if (*JJ >= cstart) break; 2233 JJ++; 2234 } 2235 d = 0; 2236 for (; j<nz; j++) { 2237 if (*JJ++ >= cend) break; 2238 d++; 2239 } 2240 d_nnz[i] = d; 2241 o_nnz[i] = nz - d; 2242 } 2243 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2244 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2245 2246 values = (PetscScalar*)V; 2247 if (!values) { 2248 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2249 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2250 } 2251 for (i=0; i<m; i++) { 2252 PetscInt row = i + rstart; 2253 PetscInt ncols = ii[i+1] - ii[i]; 2254 const PetscInt *icols = jj + ii[i]; 2255 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2256 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2257 } 2258 2259 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2260 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2261 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2262 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2263 PetscFunctionReturn(0); 2264 } 2265 2266 /*MC 2267 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2268 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2269 the matrix is stored. 2270 2271 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2272 can call MatSetOption(Mat, MAT_HERMITIAN); 2273 2274 Options Database Keys: 2275 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2276 2277 Level: beginner 2278 2279 .seealso: MatCreateMPISBAIJ 2280 M*/ 2281 2282 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2283 { 2284 Mat_MPISBAIJ *b; 2285 PetscErrorCode ierr; 2286 PetscBool flg = PETSC_FALSE; 2287 2288 PetscFunctionBegin; 2289 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2290 B->data = (void*)b; 2291 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2292 2293 B->ops->destroy = MatDestroy_MPISBAIJ; 2294 B->ops->view = MatView_MPISBAIJ; 2295 B->assembled = PETSC_FALSE; 2296 B->insertmode = NOT_SET_VALUES; 2297 2298 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2299 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2300 2301 /* build local table of row and column ownerships */ 2302 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2303 2304 /* build cache for off array entries formed */ 2305 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2306 2307 b->donotstash = PETSC_FALSE; 2308 b->colmap = NULL; 2309 b->garray = NULL; 2310 b->roworiented = PETSC_TRUE; 2311 2312 /* stuff used in block assembly */ 2313 b->barray = 0; 2314 2315 /* stuff used for matrix vector multiply */ 2316 b->lvec = 0; 2317 b->Mvctx = 0; 2318 b->slvec0 = 0; 2319 b->slvec0b = 0; 2320 b->slvec1 = 0; 2321 b->slvec1a = 0; 2322 b->slvec1b = 0; 2323 b->sMvctx = 0; 2324 2325 /* stuff for MatGetRow() */ 2326 b->rowindices = 0; 2327 b->rowvalues = 0; 2328 b->getrowactive = PETSC_FALSE; 2329 2330 /* hash table stuff */ 2331 b->ht = 0; 2332 b->hd = 0; 2333 b->ht_size = 0; 2334 b->ht_flag = PETSC_FALSE; 2335 b->ht_fact = 0; 2336 b->ht_total_ct = 0; 2337 b->ht_insert_ct = 0; 2338 2339 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2340 b->ijonly = PETSC_FALSE; 2341 2342 b->in_loc = 0; 2343 b->v_loc = 0; 2344 b->n_loc = 0; 2345 2346 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2347 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2348 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2349 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2350 #if defined(PETSC_HAVE_ELEMENTAL) 2351 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2352 #endif 2353 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr); 2354 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr); 2355 2356 B->symmetric = PETSC_TRUE; 2357 B->structurally_symmetric = PETSC_TRUE; 2358 B->symmetric_set = PETSC_TRUE; 2359 B->structurally_symmetric_set = PETSC_TRUE; 2360 B->symmetric_eternal = PETSC_TRUE; 2361 2362 B->hermitian = PETSC_FALSE; 2363 B->hermitian_set = PETSC_FALSE; 2364 2365 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2366 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2367 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2368 if (flg) { 2369 PetscReal fact = 1.39; 2370 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2371 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2372 if (fact <= 1.0) fact = 1.39; 2373 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2374 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2375 } 2376 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 /*MC 2381 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2382 2383 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2384 and MATMPISBAIJ otherwise. 2385 2386 Options Database Keys: 2387 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2388 2389 Level: beginner 2390 2391 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2392 M*/ 2393 2394 /*@C 2395 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2396 the user should preallocate the matrix storage by setting the parameters 2397 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2398 performance can be increased by more than a factor of 50. 2399 2400 Collective on Mat 2401 2402 Input Parameters: 2403 + B - the matrix 2404 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2405 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2406 . d_nz - number of block nonzeros per block row in diagonal portion of local 2407 submatrix (same for all local rows) 2408 . d_nnz - array containing the number of block nonzeros in the various block rows 2409 in the upper triangular and diagonal part of the in diagonal portion of the local 2410 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2411 for the diagonal entry and set a value even if it is zero. 2412 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2413 submatrix (same for all local rows). 2414 - o_nnz - array containing the number of nonzeros in the various block rows of the 2415 off-diagonal portion of the local submatrix that is right of the diagonal 2416 (possibly different for each block row) or NULL. 2417 2418 2419 Options Database Keys: 2420 . -mat_no_unroll - uses code that does not unroll the loops in the 2421 block calculations (much slower) 2422 . -mat_block_size - size of the blocks to use 2423 2424 Notes: 2425 2426 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2427 than it must be used on all processors that share the object for that argument. 2428 2429 If the *_nnz parameter is given then the *_nz parameter is ignored 2430 2431 Storage Information: 2432 For a square global matrix we define each processor's diagonal portion 2433 to be its local rows and the corresponding columns (a square submatrix); 2434 each processor's off-diagonal portion encompasses the remainder of the 2435 local matrix (a rectangular submatrix). 2436 2437 The user can specify preallocated storage for the diagonal part of 2438 the local submatrix with either d_nz or d_nnz (not both). Set 2439 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2440 memory allocation. Likewise, specify preallocated storage for the 2441 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2442 2443 You can call MatGetInfo() to get information on how effective the preallocation was; 2444 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2445 You can also run with the option -info and look for messages with the string 2446 malloc in them to see if additional memory allocation was needed. 2447 2448 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2449 the figure below we depict these three local rows and all columns (0-11). 2450 2451 .vb 2452 0 1 2 3 4 5 6 7 8 9 10 11 2453 -------------------------- 2454 row 3 |. . . d d d o o o o o o 2455 row 4 |. . . d d d o o o o o o 2456 row 5 |. . . d d d o o o o o o 2457 -------------------------- 2458 .ve 2459 2460 Thus, any entries in the d locations are stored in the d (diagonal) 2461 submatrix, and any entries in the o locations are stored in the 2462 o (off-diagonal) submatrix. Note that the d matrix is stored in 2463 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2464 2465 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2466 plus the diagonal part of the d matrix, 2467 and o_nz should indicate the number of block nonzeros per row in the o matrix 2468 2469 In general, for PDE problems in which most nonzeros are near the diagonal, 2470 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2471 or you will get TERRIBLE performance; see the users' manual chapter on 2472 matrices. 2473 2474 Level: intermediate 2475 2476 .keywords: matrix, block, aij, compressed row, sparse, parallel 2477 2478 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2479 @*/ 2480 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2481 { 2482 PetscErrorCode ierr; 2483 2484 PetscFunctionBegin; 2485 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2486 PetscValidType(B,1); 2487 PetscValidLogicalCollectiveInt(B,bs,2); 2488 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 /*@C 2493 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2494 (block compressed row). For good matrix assembly performance 2495 the user should preallocate the matrix storage by setting the parameters 2496 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2497 performance can be increased by more than a factor of 50. 2498 2499 Collective on MPI_Comm 2500 2501 Input Parameters: 2502 + comm - MPI communicator 2503 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2504 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2505 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2506 This value should be the same as the local size used in creating the 2507 y vector for the matrix-vector product y = Ax. 2508 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2509 This value should be the same as the local size used in creating the 2510 x vector for the matrix-vector product y = Ax. 2511 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2512 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2513 . d_nz - number of block nonzeros per block row in diagonal portion of local 2514 submatrix (same for all local rows) 2515 . d_nnz - array containing the number of block nonzeros in the various block rows 2516 in the upper triangular portion of the in diagonal portion of the local 2517 (possibly different for each block block row) or NULL. 2518 If you plan to factor the matrix you must leave room for the diagonal entry and 2519 set its value even if it is zero. 2520 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2521 submatrix (same for all local rows). 2522 - o_nnz - array containing the number of nonzeros in the various block rows of the 2523 off-diagonal portion of the local submatrix (possibly different for 2524 each block row) or NULL. 2525 2526 Output Parameter: 2527 . A - the matrix 2528 2529 Options Database Keys: 2530 . -mat_no_unroll - uses code that does not unroll the loops in the 2531 block calculations (much slower) 2532 . -mat_block_size - size of the blocks to use 2533 . -mat_mpi - use the parallel matrix data structures even on one processor 2534 (defaults to using SeqBAIJ format on one processor) 2535 2536 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2537 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2538 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2539 2540 Notes: 2541 The number of rows and columns must be divisible by blocksize. 2542 This matrix type does not support complex Hermitian operation. 2543 2544 The user MUST specify either the local or global matrix dimensions 2545 (possibly both). 2546 2547 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2548 than it must be used on all processors that share the object for that argument. 2549 2550 If the *_nnz parameter is given then the *_nz parameter is ignored 2551 2552 Storage Information: 2553 For a square global matrix we define each processor's diagonal portion 2554 to be its local rows and the corresponding columns (a square submatrix); 2555 each processor's off-diagonal portion encompasses the remainder of the 2556 local matrix (a rectangular submatrix). 2557 2558 The user can specify preallocated storage for the diagonal part of 2559 the local submatrix with either d_nz or d_nnz (not both). Set 2560 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2561 memory allocation. Likewise, specify preallocated storage for the 2562 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2563 2564 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2565 the figure below we depict these three local rows and all columns (0-11). 2566 2567 .vb 2568 0 1 2 3 4 5 6 7 8 9 10 11 2569 -------------------------- 2570 row 3 |. . . d d d o o o o o o 2571 row 4 |. . . d d d o o o o o o 2572 row 5 |. . . d d d o o o o o o 2573 -------------------------- 2574 .ve 2575 2576 Thus, any entries in the d locations are stored in the d (diagonal) 2577 submatrix, and any entries in the o locations are stored in the 2578 o (off-diagonal) submatrix. Note that the d matrix is stored in 2579 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2580 2581 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2582 plus the diagonal part of the d matrix, 2583 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2584 In general, for PDE problems in which most nonzeros are near the diagonal, 2585 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2586 or you will get TERRIBLE performance; see the users' manual chapter on 2587 matrices. 2588 2589 Level: intermediate 2590 2591 .keywords: matrix, block, aij, compressed row, sparse, parallel 2592 2593 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2594 @*/ 2595 2596 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2597 { 2598 PetscErrorCode ierr; 2599 PetscMPIInt size; 2600 2601 PetscFunctionBegin; 2602 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2603 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2604 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2605 if (size > 1) { 2606 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2607 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2608 } else { 2609 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2610 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2611 } 2612 PetscFunctionReturn(0); 2613 } 2614 2615 2616 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2617 { 2618 Mat mat; 2619 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2620 PetscErrorCode ierr; 2621 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2622 PetscScalar *array; 2623 2624 PetscFunctionBegin; 2625 *newmat = 0; 2626 2627 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2628 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2629 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2630 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2631 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2632 2633 mat->factortype = matin->factortype; 2634 mat->preallocated = PETSC_TRUE; 2635 mat->assembled = PETSC_TRUE; 2636 mat->insertmode = NOT_SET_VALUES; 2637 2638 a = (Mat_MPISBAIJ*)mat->data; 2639 a->bs2 = oldmat->bs2; 2640 a->mbs = oldmat->mbs; 2641 a->nbs = oldmat->nbs; 2642 a->Mbs = oldmat->Mbs; 2643 a->Nbs = oldmat->Nbs; 2644 2645 a->size = oldmat->size; 2646 a->rank = oldmat->rank; 2647 a->donotstash = oldmat->donotstash; 2648 a->roworiented = oldmat->roworiented; 2649 a->rowindices = 0; 2650 a->rowvalues = 0; 2651 a->getrowactive = PETSC_FALSE; 2652 a->barray = 0; 2653 a->rstartbs = oldmat->rstartbs; 2654 a->rendbs = oldmat->rendbs; 2655 a->cstartbs = oldmat->cstartbs; 2656 a->cendbs = oldmat->cendbs; 2657 2658 /* hash table stuff */ 2659 a->ht = 0; 2660 a->hd = 0; 2661 a->ht_size = 0; 2662 a->ht_flag = oldmat->ht_flag; 2663 a->ht_fact = oldmat->ht_fact; 2664 a->ht_total_ct = 0; 2665 a->ht_insert_ct = 0; 2666 2667 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2668 if (oldmat->colmap) { 2669 #if defined(PETSC_USE_CTABLE) 2670 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2671 #else 2672 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2673 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2674 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2675 #endif 2676 } else a->colmap = 0; 2677 2678 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2679 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2680 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2681 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2682 } else a->garray = 0; 2683 2684 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2685 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2686 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2687 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2688 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2689 2690 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2691 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2692 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2693 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2694 2695 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2696 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2697 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2698 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2699 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2700 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2701 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2702 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2703 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2704 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2705 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2706 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2707 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2708 2709 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2710 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2711 a->sMvctx = oldmat->sMvctx; 2712 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2713 2714 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2715 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2716 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2717 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2718 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2719 *newmat = mat; 2720 PetscFunctionReturn(0); 2721 } 2722 2723 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2724 { 2725 PetscErrorCode ierr; 2726 PetscInt i,nz,j,rstart,rend; 2727 PetscScalar *vals,*buf; 2728 MPI_Comm comm; 2729 MPI_Status status; 2730 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2731 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2732 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2733 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2734 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2735 PetscInt dcount,kmax,k,nzcount,tmp; 2736 int fd; 2737 PetscBool isbinary; 2738 2739 PetscFunctionBegin; 2740 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2741 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 2742 2743 /* force binary viewer to load .info file if it has not yet done so */ 2744 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2745 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2746 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2747 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2748 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2749 if (bs < 0) bs = 1; 2750 2751 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2752 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2753 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2754 if (!rank) { 2755 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2756 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2757 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2758 } 2759 2760 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2761 M = header[1]; 2762 N = header[2]; 2763 2764 /* If global sizes are set, check if they are consistent with that given in the file */ 2765 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2766 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2767 2768 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2769 2770 /* 2771 This code adds extra rows to make sure the number of rows is 2772 divisible by the blocksize 2773 */ 2774 Mbs = M/bs; 2775 extra_rows = bs - M + bs*(Mbs); 2776 if (extra_rows == bs) extra_rows = 0; 2777 else Mbs++; 2778 if (extra_rows &&!rank) { 2779 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2780 } 2781 2782 /* determine ownership of all rows */ 2783 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2784 mbs = Mbs/size + ((Mbs % size) > rank); 2785 m = mbs*bs; 2786 } else { /* User Set */ 2787 m = newmat->rmap->n; 2788 mbs = m/bs; 2789 } 2790 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2791 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2792 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2793 rowners[0] = 0; 2794 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2795 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2796 rstart = rowners[rank]; 2797 rend = rowners[rank+1]; 2798 2799 /* distribute row lengths to all processors */ 2800 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2801 if (!rank) { 2802 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2803 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2804 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2805 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2806 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2807 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2808 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2809 } else { 2810 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2811 } 2812 2813 if (!rank) { /* procs[0] */ 2814 /* calculate the number of nonzeros on each processor */ 2815 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2816 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2817 for (i=0; i<size; i++) { 2818 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2819 procsnz[i] += rowlengths[j]; 2820 } 2821 } 2822 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2823 2824 /* determine max buffer needed and allocate it */ 2825 maxnz = 0; 2826 for (i=0; i<size; i++) { 2827 maxnz = PetscMax(maxnz,procsnz[i]); 2828 } 2829 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2830 2831 /* read in my part of the matrix column indices */ 2832 nz = procsnz[0]; 2833 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2834 mycols = ibuf; 2835 if (size == 1) nz -= extra_rows; 2836 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2837 if (size == 1) { 2838 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2839 } 2840 2841 /* read in every ones (except the last) and ship off */ 2842 for (i=1; i<size-1; i++) { 2843 nz = procsnz[i]; 2844 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2845 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2846 } 2847 /* read in the stuff for the last proc */ 2848 if (size != 1) { 2849 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2850 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2851 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2852 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2853 } 2854 ierr = PetscFree(cols);CHKERRQ(ierr); 2855 } else { /* procs[i], i>0 */ 2856 /* determine buffer space needed for message */ 2857 nz = 0; 2858 for (i=0; i<m; i++) nz += locrowlens[i]; 2859 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2860 mycols = ibuf; 2861 /* receive message of column indices*/ 2862 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2863 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2864 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2865 } 2866 2867 /* loop over local rows, determining number of off diagonal entries */ 2868 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2869 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2870 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2871 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2872 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2873 rowcount = 0; 2874 nzcount = 0; 2875 for (i=0; i<mbs; i++) { 2876 dcount = 0; 2877 odcount = 0; 2878 for (j=0; j<bs; j++) { 2879 kmax = locrowlens[rowcount]; 2880 for (k=0; k<kmax; k++) { 2881 tmp = mycols[nzcount++]/bs; /* block col. index */ 2882 if (!mask[tmp]) { 2883 mask[tmp] = 1; 2884 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2885 else masked1[dcount++] = tmp; /* entry in diag portion */ 2886 } 2887 } 2888 rowcount++; 2889 } 2890 2891 dlens[i] = dcount; /* d_nzz[i] */ 2892 odlens[i] = odcount; /* o_nzz[i] */ 2893 2894 /* zero out the mask elements we set */ 2895 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2896 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2897 } 2898 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2899 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2900 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2901 2902 if (!rank) { 2903 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2904 /* read in my part of the matrix numerical values */ 2905 nz = procsnz[0]; 2906 vals = buf; 2907 mycols = ibuf; 2908 if (size == 1) nz -= extra_rows; 2909 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2910 if (size == 1) { 2911 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2912 } 2913 2914 /* insert into matrix */ 2915 jj = rstart*bs; 2916 for (i=0; i<m; i++) { 2917 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2918 mycols += locrowlens[i]; 2919 vals += locrowlens[i]; 2920 jj++; 2921 } 2922 2923 /* read in other processors (except the last one) and ship out */ 2924 for (i=1; i<size-1; i++) { 2925 nz = procsnz[i]; 2926 vals = buf; 2927 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2928 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2929 } 2930 /* the last proc */ 2931 if (size != 1) { 2932 nz = procsnz[i] - extra_rows; 2933 vals = buf; 2934 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2935 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2936 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2937 } 2938 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2939 2940 } else { 2941 /* receive numeric values */ 2942 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2943 2944 /* receive message of values*/ 2945 vals = buf; 2946 mycols = ibuf; 2947 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2948 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2949 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2950 2951 /* insert into matrix */ 2952 jj = rstart*bs; 2953 for (i=0; i<m; i++) { 2954 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2955 mycols += locrowlens[i]; 2956 vals += locrowlens[i]; 2957 jj++; 2958 } 2959 } 2960 2961 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2962 ierr = PetscFree(buf);CHKERRQ(ierr); 2963 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2964 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2965 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2966 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2967 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2968 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2969 PetscFunctionReturn(0); 2970 } 2971 2972 /*XXXXX@ 2973 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2974 2975 Input Parameters: 2976 . mat - the matrix 2977 . fact - factor 2978 2979 Not Collective on Mat, each process can have a different hash factor 2980 2981 Level: advanced 2982 2983 Notes: 2984 This can also be set by the command line option: -mat_use_hash_table fact 2985 2986 .keywords: matrix, hashtable, factor, HT 2987 2988 .seealso: MatSetOption() 2989 @XXXXX*/ 2990 2991 2992 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2993 { 2994 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2995 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2996 PetscReal atmp; 2997 PetscReal *work,*svalues,*rvalues; 2998 PetscErrorCode ierr; 2999 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 3000 PetscMPIInt rank,size; 3001 PetscInt *rowners_bs,dest,count,source; 3002 PetscScalar *va; 3003 MatScalar *ba; 3004 MPI_Status stat; 3005 3006 PetscFunctionBegin; 3007 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 3008 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 3009 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3010 3011 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3012 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3013 3014 bs = A->rmap->bs; 3015 mbs = a->mbs; 3016 Mbs = a->Mbs; 3017 ba = b->a; 3018 bi = b->i; 3019 bj = b->j; 3020 3021 /* find ownerships */ 3022 rowners_bs = A->rmap->range; 3023 3024 /* each proc creates an array to be distributed */ 3025 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 3026 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 3027 3028 /* row_max for B */ 3029 if (rank != size-1) { 3030 for (i=0; i<mbs; i++) { 3031 ncols = bi[1] - bi[0]; bi++; 3032 brow = bs*i; 3033 for (j=0; j<ncols; j++) { 3034 bcol = bs*(*bj); 3035 for (kcol=0; kcol<bs; kcol++) { 3036 col = bcol + kcol; /* local col index */ 3037 col += rowners_bs[rank+1]; /* global col index */ 3038 for (krow=0; krow<bs; krow++) { 3039 atmp = PetscAbsScalar(*ba); ba++; 3040 row = brow + krow; /* local row index */ 3041 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 3042 if (work[col] < atmp) work[col] = atmp; 3043 } 3044 } 3045 bj++; 3046 } 3047 } 3048 3049 /* send values to its owners */ 3050 for (dest=rank+1; dest<size; dest++) { 3051 svalues = work + rowners_bs[dest]; 3052 count = rowners_bs[dest+1]-rowners_bs[dest]; 3053 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 3054 } 3055 } 3056 3057 /* receive values */ 3058 if (rank) { 3059 rvalues = work; 3060 count = rowners_bs[rank+1]-rowners_bs[rank]; 3061 for (source=0; source<rank; source++) { 3062 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 3063 /* process values */ 3064 for (i=0; i<count; i++) { 3065 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 3066 } 3067 } 3068 } 3069 3070 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3071 ierr = PetscFree(work);CHKERRQ(ierr); 3072 PetscFunctionReturn(0); 3073 } 3074 3075 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3076 { 3077 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3078 PetscErrorCode ierr; 3079 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 3080 PetscScalar *x,*ptr,*from; 3081 Vec bb1; 3082 const PetscScalar *b; 3083 3084 PetscFunctionBegin; 3085 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3086 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3087 3088 if (flag == SOR_APPLY_UPPER) { 3089 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3090 PetscFunctionReturn(0); 3091 } 3092 3093 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3094 if (flag & SOR_ZERO_INITIAL_GUESS) { 3095 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3096 its--; 3097 } 3098 3099 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3100 while (its--) { 3101 3102 /* lower triangular part: slvec0b = - B^T*xx */ 3103 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3104 3105 /* copy xx into slvec0a */ 3106 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3107 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3108 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3109 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3110 3111 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3112 3113 /* copy bb into slvec1a */ 3114 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3115 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3116 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3117 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3118 3119 /* set slvec1b = 0 */ 3120 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3121 3122 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3123 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3124 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3125 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3126 3127 /* upper triangular part: bb1 = bb1 - B*x */ 3128 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3129 3130 /* local diagonal sweep */ 3131 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3132 } 3133 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3134 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3135 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3136 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3137 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3138 } else if (flag & SOR_EISENSTAT) { 3139 Vec xx1; 3140 PetscBool hasop; 3141 const PetscScalar *diag; 3142 PetscScalar *sl,scale = (omega - 2.0)/omega; 3143 PetscInt i,n; 3144 3145 if (!mat->xx1) { 3146 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3147 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3148 } 3149 xx1 = mat->xx1; 3150 bb1 = mat->bb1; 3151 3152 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3153 3154 if (!mat->diag) { 3155 /* this is wrong for same matrix with new nonzero values */ 3156 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3157 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3158 } 3159 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3160 3161 if (hasop) { 3162 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3163 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3164 } else { 3165 /* 3166 These two lines are replaced by code that may be a bit faster for a good compiler 3167 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3168 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3169 */ 3170 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3171 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3172 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3174 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3175 if (omega == 1.0) { 3176 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3177 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3178 } else { 3179 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3180 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3181 } 3182 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3183 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3184 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3185 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3186 } 3187 3188 /* multiply off-diagonal portion of matrix */ 3189 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3190 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3191 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3192 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3193 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3194 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3195 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3196 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3197 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3198 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3199 3200 /* local sweep */ 3201 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3202 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3203 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3204 PetscFunctionReturn(0); 3205 } 3206 3207 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3208 { 3209 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3210 PetscErrorCode ierr; 3211 Vec lvec1,bb1; 3212 3213 PetscFunctionBegin; 3214 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3215 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3216 3217 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3218 if (flag & SOR_ZERO_INITIAL_GUESS) { 3219 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3220 its--; 3221 } 3222 3223 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3224 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3225 while (its--) { 3226 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3227 3228 /* lower diagonal part: bb1 = bb - B^T*xx */ 3229 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3230 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3231 3232 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3233 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3234 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3235 3236 /* upper diagonal part: bb1 = bb1 - B*x */ 3237 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3238 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3239 3240 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3241 3242 /* diagonal sweep */ 3243 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3244 } 3245 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3246 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3247 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3248 PetscFunctionReturn(0); 3249 } 3250 3251 /*@ 3252 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3253 CSR format the local rows. 3254 3255 Collective on MPI_Comm 3256 3257 Input Parameters: 3258 + comm - MPI communicator 3259 . bs - the block size, only a block size of 1 is supported 3260 . m - number of local rows (Cannot be PETSC_DECIDE) 3261 . n - This value should be the same as the local size used in creating the 3262 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3263 calculated if N is given) For square matrices n is almost always m. 3264 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3265 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3266 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 3267 . j - column indices 3268 - a - matrix values 3269 3270 Output Parameter: 3271 . mat - the matrix 3272 3273 Level: intermediate 3274 3275 Notes: 3276 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3277 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3278 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3279 3280 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3281 3282 .keywords: matrix, aij, compressed row, sparse, parallel 3283 3284 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3285 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3286 @*/ 3287 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3288 { 3289 PetscErrorCode ierr; 3290 3291 3292 PetscFunctionBegin; 3293 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3294 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3295 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3296 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3297 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3298 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3299 PetscFunctionReturn(0); 3300 } 3301 3302 3303 /*@C 3304 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3305 (the default parallel PETSc format). 3306 3307 Collective on MPI_Comm 3308 3309 Input Parameters: 3310 + B - the matrix 3311 . bs - the block size 3312 . i - the indices into j for the start of each local row (starts with zero) 3313 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3314 - v - optional values in the matrix 3315 3316 Level: developer 3317 3318 .keywords: matrix, aij, compressed row, sparse, parallel 3319 3320 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3321 @*/ 3322 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3323 { 3324 PetscErrorCode ierr; 3325 3326 PetscFunctionBegin; 3327 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3328 PetscFunctionReturn(0); 3329 } 3330 3331 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3332 { 3333 PetscErrorCode ierr; 3334 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3335 PetscInt *indx; 3336 PetscScalar *values; 3337 3338 PetscFunctionBegin; 3339 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3340 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3341 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3342 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3343 PetscInt *bindx,rmax=a->rmax,j; 3344 3345 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3346 mbs = m/bs; Nbs = N/cbs; 3347 if (n == PETSC_DECIDE) { 3348 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3349 } 3350 /* Check sum(n) = Nbs */ 3351 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3352 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3353 3354 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3355 rstart -= mbs; 3356 3357 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3358 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3359 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3360 for (i=0; i<mbs; i++) { 3361 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3362 nnz = nnz/bs; 3363 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3364 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3365 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3366 } 3367 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3368 ierr = PetscFree(bindx);CHKERRQ(ierr); 3369 3370 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3371 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3372 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3373 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3374 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3375 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3376 } 3377 3378 /* numeric phase */ 3379 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3380 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3381 3382 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3383 for (i=0; i<m; i++) { 3384 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3385 Ii = i + rstart; 3386 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3387 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3388 } 3389 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3390 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3391 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3392 PetscFunctionReturn(0); 3393 } 3394