1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 /* This could be moved to matimpl.h */ 12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 13 { 14 Mat preallocator; 15 PetscInt r,rstart,rend; 16 PetscInt bs,i,m,n,M,N; 17 PetscBool cong = PETSC_TRUE; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 22 PetscValidLogicalCollectiveInt(B,nm,2); 23 for (i = 0; i < nm; i++) { 24 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 25 ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 26 if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 27 } 28 PetscValidLogicalCollectiveBool(B,fill,5); 29 ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 30 ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 31 ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 32 ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 33 ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 34 ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 35 ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 36 ierr = MatSetUp(preallocator);CHKERRQ(ierr); 37 ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 38 for (r = rstart; r < rend; ++r) { 39 PetscInt ncols; 40 const PetscInt *row; 41 const PetscScalar *vals; 42 43 for (i = 0; i < nm; i++) { 44 ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 45 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 46 if (symm && symm[i]) { 47 ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 48 } 49 ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 50 } 51 } 52 ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 53 ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 54 ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 55 ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 static PetscErrorCode MatConvert_MPISBAIJ_XAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 60 { 61 Mat B; 62 PetscErrorCode ierr; 63 PetscInt r; 64 65 PetscFunctionBegin; 66 if (reuse != MAT_REUSE_MATRIX) { 67 PetscBool symm = PETSC_TRUE; 68 PetscInt bs; 69 70 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 71 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 72 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 73 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 74 ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 75 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 76 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 77 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 78 ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 79 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 80 } else B = *newmat; 81 82 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 83 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 84 PetscInt ncols; 85 const PetscInt *row; 86 const PetscScalar *vals; 87 88 ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 89 ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 90 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 91 ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 92 } 93 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 94 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96 97 if (reuse == MAT_INPLACE_MATRIX) { 98 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 99 } else { 100 *newmat = B; 101 } 102 PetscFunctionReturn(0); 103 } 104 105 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 106 { 107 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 108 PetscErrorCode ierr; 109 110 PetscFunctionBegin; 111 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 112 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 117 { 118 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 119 PetscErrorCode ierr; 120 121 PetscFunctionBegin; 122 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 123 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 124 PetscFunctionReturn(0); 125 } 126 127 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 128 { \ 129 \ 130 brow = row/bs; \ 131 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 132 rmax = aimax[brow]; nrow = ailen[brow]; \ 133 bcol = col/bs; \ 134 ridx = row % bs; cidx = col % bs; \ 135 low = 0; high = nrow; \ 136 while (high-low > 3) { \ 137 t = (low+high)/2; \ 138 if (rp[t] > bcol) high = t; \ 139 else low = t; \ 140 } \ 141 for (_i=low; _i<high; _i++) { \ 142 if (rp[_i] > bcol) break; \ 143 if (rp[_i] == bcol) { \ 144 bap = ap + bs2*_i + bs*cidx + ridx; \ 145 if (addv == ADD_VALUES) *bap += value; \ 146 else *bap = value; \ 147 goto a_noinsert; \ 148 } \ 149 } \ 150 if (a->nonew == 1) goto a_noinsert; \ 151 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 152 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 153 N = nrow++ - 1; \ 154 /* shift up all the later entries in this row */ \ 155 for (ii=N; ii>=_i; ii--) { \ 156 rp[ii+1] = rp[ii]; \ 157 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 158 } \ 159 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 160 rp[_i] = bcol; \ 161 ap[bs2*_i + bs*cidx + ridx] = value; \ 162 A->nonzerostate++;\ 163 a_noinsert:; \ 164 ailen[brow] = nrow; \ 165 } 166 167 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 168 { \ 169 brow = row/bs; \ 170 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 171 rmax = bimax[brow]; nrow = bilen[brow]; \ 172 bcol = col/bs; \ 173 ridx = row % bs; cidx = col % bs; \ 174 low = 0; high = nrow; \ 175 while (high-low > 3) { \ 176 t = (low+high)/2; \ 177 if (rp[t] > bcol) high = t; \ 178 else low = t; \ 179 } \ 180 for (_i=low; _i<high; _i++) { \ 181 if (rp[_i] > bcol) break; \ 182 if (rp[_i] == bcol) { \ 183 bap = ap + bs2*_i + bs*cidx + ridx; \ 184 if (addv == ADD_VALUES) *bap += value; \ 185 else *bap = value; \ 186 goto b_noinsert; \ 187 } \ 188 } \ 189 if (b->nonew == 1) goto b_noinsert; \ 190 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 191 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 192 N = nrow++ - 1; \ 193 /* shift up all the later entries in this row */ \ 194 for (ii=N; ii>=_i; ii--) { \ 195 rp[ii+1] = rp[ii]; \ 196 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 197 } \ 198 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 199 rp[_i] = bcol; \ 200 ap[bs2*_i + bs*cidx + ridx] = value; \ 201 B->nonzerostate++;\ 202 b_noinsert:; \ 203 bilen[brow] = nrow; \ 204 } 205 206 /* Only add/insert a(i,j) with i<=j (blocks). 207 Any a(i,j) with i>j input by user is ingored. 208 */ 209 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 210 { 211 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 212 MatScalar value; 213 PetscBool roworiented = baij->roworiented; 214 PetscErrorCode ierr; 215 PetscInt i,j,row,col; 216 PetscInt rstart_orig=mat->rmap->rstart; 217 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 218 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 219 220 /* Some Variables required in the macro */ 221 Mat A = baij->A; 222 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 223 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 224 MatScalar *aa =a->a; 225 226 Mat B = baij->B; 227 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 228 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 229 MatScalar *ba =b->a; 230 231 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 232 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 233 MatScalar *ap,*bap; 234 235 /* for stash */ 236 PetscInt n_loc, *in_loc = NULL; 237 MatScalar *v_loc = NULL; 238 239 PetscFunctionBegin; 240 if (!baij->donotstash) { 241 if (n > baij->n_loc) { 242 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 243 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 244 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 245 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 246 247 baij->n_loc = n; 248 } 249 in_loc = baij->in_loc; 250 v_loc = baij->v_loc; 251 } 252 253 for (i=0; i<m; i++) { 254 if (im[i] < 0) continue; 255 #if defined(PETSC_USE_DEBUG) 256 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 257 #endif 258 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 259 row = im[i] - rstart_orig; /* local row index */ 260 for (j=0; j<n; j++) { 261 if (im[i]/bs > in[j]/bs) { 262 if (a->ignore_ltriangular) { 263 continue; /* ignore lower triangular blocks */ 264 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 265 } 266 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 267 col = in[j] - cstart_orig; /* local col index */ 268 brow = row/bs; bcol = col/bs; 269 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 270 if (roworiented) value = v[i*n+j]; 271 else value = v[i+j*m]; 272 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 273 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 274 } else if (in[j] < 0) continue; 275 #if defined(PETSC_USE_DEBUG) 276 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 277 #endif 278 else { /* off-diag entry (B) */ 279 if (mat->was_assembled) { 280 if (!baij->colmap) { 281 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 282 } 283 #if defined(PETSC_USE_CTABLE) 284 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j]/bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 290 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ*)(B)->data; 295 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 296 ba = b->a; 297 } else col += in[j]%bs; 298 } else col = in[j]; 299 if (roworiented) value = v[i*n+j]; 300 else value = v[i+j*m]; 301 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 302 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 303 } 304 } 305 } else { /* off processor entry */ 306 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 307 if (!baij->donotstash) { 308 mat->assembled = PETSC_FALSE; 309 n_loc = 0; 310 for (j=0; j<n; j++) { 311 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 312 in_loc[n_loc] = in[j]; 313 if (roworiented) { 314 v_loc[n_loc] = v[i*n+j]; 315 } else { 316 v_loc[n_loc] = v[j*m+i]; 317 } 318 n_loc++; 319 } 320 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 321 } 322 } 323 } 324 PetscFunctionReturn(0); 325 } 326 327 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 328 { 329 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 330 PetscErrorCode ierr; 331 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 332 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 333 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 334 PetscBool roworiented=a->roworiented; 335 const PetscScalar *value = v; 336 MatScalar *ap,*aa = a->a,*bap; 337 338 PetscFunctionBegin; 339 if (col < row) { 340 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 341 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 342 } 343 rp = aj + ai[row]; 344 ap = aa + bs2*ai[row]; 345 rmax = imax[row]; 346 nrow = ailen[row]; 347 value = v; 348 low = 0; 349 high = nrow; 350 351 while (high-low > 7) { 352 t = (low+high)/2; 353 if (rp[t] > col) high = t; 354 else low = t; 355 } 356 for (i=low; i<high; i++) { 357 if (rp[i] > col) break; 358 if (rp[i] == col) { 359 bap = ap + bs2*i; 360 if (roworiented) { 361 if (is == ADD_VALUES) { 362 for (ii=0; ii<bs; ii++) { 363 for (jj=ii; jj<bs2; jj+=bs) { 364 bap[jj] += *value++; 365 } 366 } 367 } else { 368 for (ii=0; ii<bs; ii++) { 369 for (jj=ii; jj<bs2; jj+=bs) { 370 bap[jj] = *value++; 371 } 372 } 373 } 374 } else { 375 if (is == ADD_VALUES) { 376 for (ii=0; ii<bs; ii++) { 377 for (jj=0; jj<bs; jj++) { 378 *bap++ += *value++; 379 } 380 } 381 } else { 382 for (ii=0; ii<bs; ii++) { 383 for (jj=0; jj<bs; jj++) { 384 *bap++ = *value++; 385 } 386 } 387 } 388 } 389 goto noinsert2; 390 } 391 } 392 if (nonew == 1) goto noinsert2; 393 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 394 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 395 N = nrow++ - 1; high++; 396 /* shift up all the later entries in this row */ 397 for (ii=N; ii>=i; ii--) { 398 rp[ii+1] = rp[ii]; 399 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 400 } 401 if (N >= i) { 402 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 403 } 404 rp[i] = col; 405 bap = ap + bs2*i; 406 if (roworiented) { 407 for (ii=0; ii<bs; ii++) { 408 for (jj=ii; jj<bs2; jj+=bs) { 409 bap[jj] = *value++; 410 } 411 } 412 } else { 413 for (ii=0; ii<bs; ii++) { 414 for (jj=0; jj<bs; jj++) { 415 *bap++ = *value++; 416 } 417 } 418 } 419 noinsert2:; 420 ailen[row] = nrow; 421 PetscFunctionReturn(0); 422 } 423 424 /* 425 This routine is exactly duplicated in mpibaij.c 426 */ 427 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 428 { 429 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 430 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 431 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 432 PetscErrorCode ierr; 433 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 434 PetscBool roworiented=a->roworiented; 435 const PetscScalar *value = v; 436 MatScalar *ap,*aa = a->a,*bap; 437 438 PetscFunctionBegin; 439 rp = aj + ai[row]; 440 ap = aa + bs2*ai[row]; 441 rmax = imax[row]; 442 nrow = ailen[row]; 443 low = 0; 444 high = nrow; 445 value = v; 446 while (high-low > 7) { 447 t = (low+high)/2; 448 if (rp[t] > col) high = t; 449 else low = t; 450 } 451 for (i=low; i<high; i++) { 452 if (rp[i] > col) break; 453 if (rp[i] == col) { 454 bap = ap + bs2*i; 455 if (roworiented) { 456 if (is == ADD_VALUES) { 457 for (ii=0; ii<bs; ii++) { 458 for (jj=ii; jj<bs2; jj+=bs) { 459 bap[jj] += *value++; 460 } 461 } 462 } else { 463 for (ii=0; ii<bs; ii++) { 464 for (jj=ii; jj<bs2; jj+=bs) { 465 bap[jj] = *value++; 466 } 467 } 468 } 469 } else { 470 if (is == ADD_VALUES) { 471 for (ii=0; ii<bs; ii++,value+=bs) { 472 for (jj=0; jj<bs; jj++) { 473 bap[jj] += value[jj]; 474 } 475 bap += bs; 476 } 477 } else { 478 for (ii=0; ii<bs; ii++,value+=bs) { 479 for (jj=0; jj<bs; jj++) { 480 bap[jj] = value[jj]; 481 } 482 bap += bs; 483 } 484 } 485 } 486 goto noinsert2; 487 } 488 } 489 if (nonew == 1) goto noinsert2; 490 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 491 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 492 N = nrow++ - 1; high++; 493 /* shift up all the later entries in this row */ 494 for (ii=N; ii>=i; ii--) { 495 rp[ii+1] = rp[ii]; 496 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 497 } 498 if (N >= i) { 499 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 500 } 501 rp[i] = col; 502 bap = ap + bs2*i; 503 if (roworiented) { 504 for (ii=0; ii<bs; ii++) { 505 for (jj=ii; jj<bs2; jj+=bs) { 506 bap[jj] = *value++; 507 } 508 } 509 } else { 510 for (ii=0; ii<bs; ii++) { 511 for (jj=0; jj<bs; jj++) { 512 *bap++ = *value++; 513 } 514 } 515 } 516 noinsert2:; 517 ailen[row] = nrow; 518 PetscFunctionReturn(0); 519 } 520 521 /* 522 This routine could be optimized by removing the need for the block copy below and passing stride information 523 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 524 */ 525 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 526 { 527 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 528 const MatScalar *value; 529 MatScalar *barray =baij->barray; 530 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 531 PetscErrorCode ierr; 532 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 533 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 534 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 535 536 PetscFunctionBegin; 537 if (!barray) { 538 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 539 baij->barray = barray; 540 } 541 542 if (roworiented) { 543 stepval = (n-1)*bs; 544 } else { 545 stepval = (m-1)*bs; 546 } 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 551 #endif 552 if (im[i] >= rstart && im[i] < rend) { 553 row = im[i] - rstart; 554 for (j=0; j<n; j++) { 555 if (im[i] > in[j]) { 556 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 557 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 558 } 559 /* If NumCol = 1 then a copy is not required */ 560 if ((roworiented) && (n == 1)) { 561 barray = (MatScalar*) v + i*bs2; 562 } else if ((!roworiented) && (m == 1)) { 563 barray = (MatScalar*) v + j*bs2; 564 } else { /* Here a copy is required */ 565 if (roworiented) { 566 value = v + i*(stepval+bs)*bs + j*bs; 567 } else { 568 value = v + j*(stepval+bs)*bs + i*bs; 569 } 570 for (ii=0; ii<bs; ii++,value+=stepval) { 571 for (jj=0; jj<bs; jj++) { 572 *barray++ = *value++; 573 } 574 } 575 barray -=bs2; 576 } 577 578 if (in[j] >= cstart && in[j] < cend) { 579 col = in[j] - cstart; 580 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 581 } else if (in[j] < 0) continue; 582 #if defined(PETSC_USE_DEBUG) 583 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 584 #endif 585 else { 586 if (mat->was_assembled) { 587 if (!baij->colmap) { 588 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 589 } 590 591 #if defined(PETSC_USE_DEBUG) 592 #if defined(PETSC_USE_CTABLE) 593 { PetscInt data; 594 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 595 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 596 } 597 #else 598 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 599 #endif 600 #endif 601 #if defined(PETSC_USE_CTABLE) 602 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 603 col = (col - 1)/bs; 604 #else 605 col = (baij->colmap[in[j]] - 1)/bs; 606 #endif 607 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 608 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 609 col = in[j]; 610 } 611 } else col = in[j]; 612 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 613 } 614 } 615 } else { 616 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 617 if (!baij->donotstash) { 618 if (roworiented) { 619 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 620 } else { 621 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 622 } 623 } 624 } 625 } 626 PetscFunctionReturn(0); 627 } 628 629 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 630 { 631 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 632 PetscErrorCode ierr; 633 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 634 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 635 636 PetscFunctionBegin; 637 for (i=0; i<m; i++) { 638 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 639 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 640 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 641 row = idxm[i] - bsrstart; 642 for (j=0; j<n; j++) { 643 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 644 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 645 if (idxn[j] >= bscstart && idxn[j] < bscend) { 646 col = idxn[j] - bscstart; 647 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 648 } else { 649 if (!baij->colmap) { 650 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 651 } 652 #if defined(PETSC_USE_CTABLE) 653 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 654 data--; 655 #else 656 data = baij->colmap[idxn[j]/bs]-1; 657 #endif 658 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 659 else { 660 col = data + idxn[j]%bs; 661 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 662 } 663 } 664 } 665 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 666 } 667 PetscFunctionReturn(0); 668 } 669 670 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 671 { 672 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 673 PetscErrorCode ierr; 674 PetscReal sum[2],*lnorm2; 675 676 PetscFunctionBegin; 677 if (baij->size == 1) { 678 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 679 } else { 680 if (type == NORM_FROBENIUS) { 681 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 682 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 683 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 684 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 685 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 686 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 687 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 688 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 690 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 691 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 692 PetscReal *rsum,*rsum2,vabs; 693 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 694 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 695 MatScalar *v; 696 697 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 698 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 699 /* Amat */ 700 v = amat->a; jj = amat->j; 701 for (brow=0; brow<mbs; brow++) { 702 grow = bs*(rstart + brow); 703 nz = amat->i[brow+1] - amat->i[brow]; 704 for (bcol=0; bcol<nz; bcol++) { 705 gcol = bs*(rstart + *jj); jj++; 706 for (col=0; col<bs; col++) { 707 for (row=0; row<bs; row++) { 708 vabs = PetscAbsScalar(*v); v++; 709 rsum[gcol+col] += vabs; 710 /* non-diagonal block */ 711 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 712 } 713 } 714 } 715 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 716 } 717 /* Bmat */ 718 v = bmat->a; jj = bmat->j; 719 for (brow=0; brow<mbs; brow++) { 720 grow = bs*(rstart + brow); 721 nz = bmat->i[brow+1] - bmat->i[brow]; 722 for (bcol=0; bcol<nz; bcol++) { 723 gcol = bs*garray[*jj]; jj++; 724 for (col=0; col<bs; col++) { 725 for (row=0; row<bs; row++) { 726 vabs = PetscAbsScalar(*v); v++; 727 rsum[gcol+col] += vabs; 728 rsum[grow+row] += vabs; 729 } 730 } 731 } 732 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 733 } 734 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 735 *norm = 0.0; 736 for (col=0; col<mat->cmap->N; col++) { 737 if (rsum2[col] > *norm) *norm = rsum2[col]; 738 } 739 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 740 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 741 } 742 PetscFunctionReturn(0); 743 } 744 745 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 746 { 747 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 748 PetscErrorCode ierr; 749 PetscInt nstash,reallocs; 750 751 PetscFunctionBegin; 752 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 753 754 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 755 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 756 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 757 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 758 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 759 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 764 { 765 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 766 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 767 PetscErrorCode ierr; 768 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 769 PetscInt *row,*col; 770 PetscBool other_disassembled; 771 PetscMPIInt n; 772 PetscBool r1,r2,r3; 773 MatScalar *val; 774 775 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 776 PetscFunctionBegin; 777 if (!baij->donotstash && !mat->nooffprocentries) { 778 while (1) { 779 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 780 if (!flg) break; 781 782 for (i=0; i<n;) { 783 /* Now identify the consecutive vals belonging to the same row */ 784 for (j=i,rstart=row[j]; j<n; j++) { 785 if (row[j] != rstart) break; 786 } 787 if (j < n) ncols = j-i; 788 else ncols = n-i; 789 /* Now assemble all these values with a single function call */ 790 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 791 i = j; 792 } 793 } 794 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 795 /* Now process the block-stash. Since the values are stashed column-oriented, 796 set the roworiented flag to column oriented, and after MatSetValues() 797 restore the original flags */ 798 r1 = baij->roworiented; 799 r2 = a->roworiented; 800 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 801 802 baij->roworiented = PETSC_FALSE; 803 a->roworiented = PETSC_FALSE; 804 805 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 806 while (1) { 807 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 808 if (!flg) break; 809 810 for (i=0; i<n;) { 811 /* Now identify the consecutive vals belonging to the same row */ 812 for (j=i,rstart=row[j]; j<n; j++) { 813 if (row[j] != rstart) break; 814 } 815 if (j < n) ncols = j-i; 816 else ncols = n-i; 817 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 818 i = j; 819 } 820 } 821 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 822 823 baij->roworiented = r1; 824 a->roworiented = r2; 825 826 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 827 } 828 829 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 830 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 831 832 /* determine if any processor has disassembled, if so we must 833 also disassemble ourselfs, in order that we may reassemble. */ 834 /* 835 if nonzero structure of submatrix B cannot change then we know that 836 no processor disassembled thus we can skip this stuff 837 */ 838 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 839 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 840 if (mat->was_assembled && !other_disassembled) { 841 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 842 } 843 } 844 845 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 846 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 847 } 848 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 849 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 850 851 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 852 853 baij->rowvalues = 0; 854 855 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 856 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 857 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 858 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 859 } 860 PetscFunctionReturn(0); 861 } 862 863 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 864 #include <petscdraw.h> 865 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 866 { 867 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 868 PetscErrorCode ierr; 869 PetscInt bs = mat->rmap->bs; 870 PetscMPIInt rank = baij->rank; 871 PetscBool iascii,isdraw; 872 PetscViewer sviewer; 873 PetscViewerFormat format; 874 875 PetscFunctionBegin; 876 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 877 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 878 if (iascii) { 879 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 880 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 881 MatInfo info; 882 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 883 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 884 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 885 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 886 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 887 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 888 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 890 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 891 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 892 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 893 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 894 PetscFunctionReturn(0); 895 } else if (format == PETSC_VIEWER_ASCII_INFO) { 896 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 899 PetscFunctionReturn(0); 900 } 901 } 902 903 if (isdraw) { 904 PetscDraw draw; 905 PetscBool isnull; 906 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 907 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 908 if (isnull) PetscFunctionReturn(0); 909 } 910 911 { 912 /* assemble the entire matrix onto first processor. */ 913 Mat A; 914 Mat_SeqSBAIJ *Aloc; 915 Mat_SeqBAIJ *Bloc; 916 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 917 MatScalar *a; 918 const char *matname; 919 920 /* Should this be the same type as mat? */ 921 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 922 if (!rank) { 923 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 924 } else { 925 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 926 } 927 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 928 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 929 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 930 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 931 932 /* copy over the A part */ 933 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 934 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 935 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 936 937 for (i=0; i<mbs; i++) { 938 rvals[0] = bs*(baij->rstartbs + i); 939 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 940 for (j=ai[i]; j<ai[i+1]; j++) { 941 col = (baij->cstartbs+aj[j])*bs; 942 for (k=0; k<bs; k++) { 943 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 944 col++; 945 a += bs; 946 } 947 } 948 } 949 /* copy over the B part */ 950 Bloc = (Mat_SeqBAIJ*)baij->B->data; 951 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 952 for (i=0; i<mbs; i++) { 953 954 rvals[0] = bs*(baij->rstartbs + i); 955 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 956 for (j=ai[i]; j<ai[i+1]; j++) { 957 col = baij->garray[aj[j]]*bs; 958 for (k=0; k<bs; k++) { 959 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 960 col++; 961 a += bs; 962 } 963 } 964 } 965 ierr = PetscFree(rvals);CHKERRQ(ierr); 966 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 968 /* 969 Everyone has to call to draw the matrix since the graphics waits are 970 synchronized across all processors that share the PetscDraw object 971 */ 972 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 973 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 974 if (!rank) { 975 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 976 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 977 } 978 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 979 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 980 ierr = MatDestroy(&A);CHKERRQ(ierr); 981 } 982 PetscFunctionReturn(0); 983 } 984 985 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 986 { 987 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 988 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 989 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 990 PetscErrorCode ierr; 991 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 992 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 993 int fd; 994 PetscScalar *column_values; 995 FILE *file; 996 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 997 PetscInt message_count,flowcontrolcount; 998 999 PetscFunctionBegin; 1000 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1001 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1002 nz = bs2*(A->nz + B->nz); 1003 rlen = mat->rmap->n; 1004 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1005 if (!rank) { 1006 header[0] = MAT_FILE_CLASSID; 1007 header[1] = mat->rmap->N; 1008 header[2] = mat->cmap->N; 1009 1010 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1011 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1012 /* get largest number of rows any processor has */ 1013 range = mat->rmap->range; 1014 for (i=1; i<size; i++) { 1015 rlen = PetscMax(rlen,range[i+1] - range[i]); 1016 } 1017 } else { 1018 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1019 } 1020 1021 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1022 /* compute lengths of each row */ 1023 for (i=0; i<a->mbs; i++) { 1024 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1025 } 1026 /* store the row lengths to the file */ 1027 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1028 if (!rank) { 1029 MPI_Status status; 1030 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1031 rlen = (range[1] - range[0])/bs; 1032 for (i=0; i<rlen; i++) { 1033 for (j=0; j<bs; j++) { 1034 row_lens[i*bs+j] = bs*crow_lens[i]; 1035 } 1036 } 1037 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1038 for (i=1; i<size; i++) { 1039 rlen = (range[i+1] - range[i])/bs; 1040 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1041 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1042 for (k=0; k<rlen; k++) { 1043 for (j=0; j<bs; j++) { 1044 row_lens[k*bs+j] = bs*crow_lens[k]; 1045 } 1046 } 1047 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1048 } 1049 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1050 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1051 } else { 1052 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1053 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1054 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1055 } 1056 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1057 1058 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1059 information needed to make it for each row from a block row. This does require more communication but still not more than 1060 the communication needed for the nonzero values */ 1061 nzmax = nz; /* space a largest processor needs */ 1062 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1063 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1064 cnt = 0; 1065 for (i=0; i<a->mbs; i++) { 1066 pcnt = cnt; 1067 for (j=B->i[i]; j<B->i[i+1]; j++) { 1068 if ((col = garray[B->j[j]]) > cstart) break; 1069 for (l=0; l<bs; l++) { 1070 column_indices[cnt++] = bs*col+l; 1071 } 1072 } 1073 for (k=A->i[i]; k<A->i[i+1]; k++) { 1074 for (l=0; l<bs; l++) { 1075 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1076 } 1077 } 1078 for (; j<B->i[i+1]; j++) { 1079 for (l=0; l<bs; l++) { 1080 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1081 } 1082 } 1083 len = cnt - pcnt; 1084 for (k=1; k<bs; k++) { 1085 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1086 cnt += len; 1087 } 1088 } 1089 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1090 1091 /* store the columns to the file */ 1092 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1093 if (!rank) { 1094 MPI_Status status; 1095 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1096 for (i=1; i<size; i++) { 1097 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1098 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1099 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1100 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1101 } 1102 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1103 } else { 1104 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1105 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1106 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1107 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1108 } 1109 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1110 1111 /* load up the numerical values */ 1112 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1113 cnt = 0; 1114 for (i=0; i<a->mbs; i++) { 1115 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1116 for (j=B->i[i]; j<B->i[i+1]; j++) { 1117 if (garray[B->j[j]] > cstart) break; 1118 for (l=0; l<bs; l++) { 1119 for (ll=0; ll<bs; ll++) { 1120 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1121 } 1122 } 1123 cnt += bs; 1124 } 1125 for (k=A->i[i]; k<A->i[i+1]; k++) { 1126 for (l=0; l<bs; l++) { 1127 for (ll=0; ll<bs; ll++) { 1128 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1129 } 1130 } 1131 cnt += bs; 1132 } 1133 for (; j<B->i[i+1]; j++) { 1134 for (l=0; l<bs; l++) { 1135 for (ll=0; ll<bs; ll++) { 1136 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1137 } 1138 } 1139 cnt += bs; 1140 } 1141 cnt += (bs-1)*rlen; 1142 } 1143 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1144 1145 /* store the column values to the file */ 1146 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1147 if (!rank) { 1148 MPI_Status status; 1149 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1150 for (i=1; i<size; i++) { 1151 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1152 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1153 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1154 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1155 } 1156 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1157 } else { 1158 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1159 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1160 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1161 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1162 } 1163 ierr = PetscFree(column_values);CHKERRQ(ierr); 1164 1165 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1166 if (file) { 1167 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1173 { 1174 PetscErrorCode ierr; 1175 PetscBool iascii,isdraw,issocket,isbinary; 1176 1177 PetscFunctionBegin; 1178 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1179 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1180 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1181 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1182 if (iascii || isdraw || issocket) { 1183 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1184 } else if (isbinary) { 1185 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1186 } 1187 PetscFunctionReturn(0); 1188 } 1189 1190 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1191 { 1192 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 #if defined(PETSC_USE_LOG) 1197 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1198 #endif 1199 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1200 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1201 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1202 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1203 #if defined(PETSC_USE_CTABLE) 1204 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1205 #else 1206 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1207 #endif 1208 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1209 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1210 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1211 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1212 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1213 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1214 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1215 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1216 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1217 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1218 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1219 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1220 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1221 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1222 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1223 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1224 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1225 #endif 1226 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1227 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1228 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1229 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1230 1231 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1235 #if defined(PETSC_HAVE_ELEMENTAL) 1236 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1237 #endif 1238 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1244 { 1245 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1246 PetscErrorCode ierr; 1247 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1248 PetscScalar *from; 1249 const PetscScalar *x; 1250 1251 PetscFunctionBegin; 1252 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1253 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1254 1255 /* diagonal part */ 1256 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1257 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1258 1259 /* subdiagonal part */ 1260 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1261 1262 /* copy x into the vec slvec0 */ 1263 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1264 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1265 1266 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1267 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1268 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1269 1270 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1271 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1272 /* supperdiagonal part */ 1273 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1278 { 1279 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1280 PetscErrorCode ierr; 1281 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1282 PetscScalar *from; 1283 const PetscScalar *x; 1284 1285 PetscFunctionBegin; 1286 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1287 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1288 1289 /* diagonal part */ 1290 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1291 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1292 1293 /* subdiagonal part */ 1294 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1295 1296 /* copy x into the vec slvec0 */ 1297 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1298 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1299 1300 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1301 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1302 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1303 1304 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1305 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1306 /* supperdiagonal part */ 1307 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1312 { 1313 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1314 PetscErrorCode ierr; 1315 PetscInt nt; 1316 1317 PetscFunctionBegin; 1318 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1319 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1320 1321 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1322 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1323 1324 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1325 /* do diagonal part */ 1326 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1327 /* do supperdiagonal part */ 1328 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1329 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1330 /* do subdiagonal part */ 1331 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1332 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1333 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1338 { 1339 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1340 PetscErrorCode ierr; 1341 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1342 PetscScalar *from,zero=0.0; 1343 const PetscScalar *x; 1344 1345 PetscFunctionBegin; 1346 /* 1347 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1348 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1349 */ 1350 /* diagonal part */ 1351 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1352 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1353 1354 /* subdiagonal part */ 1355 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1356 1357 /* copy x into the vec slvec0 */ 1358 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1359 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1360 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1361 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1362 1363 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1364 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1365 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1366 1367 /* supperdiagonal part */ 1368 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1373 { 1374 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1379 /* do diagonal part */ 1380 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1381 /* do supperdiagonal part */ 1382 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1383 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1384 1385 /* do subdiagonal part */ 1386 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1387 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1388 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 /* 1393 This only works correctly for square matrices where the subblock A->A is the 1394 diagonal block 1395 */ 1396 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1397 { 1398 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1399 PetscErrorCode ierr; 1400 1401 PetscFunctionBegin; 1402 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1403 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 1407 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1408 { 1409 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1414 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1415 PetscFunctionReturn(0); 1416 } 1417 1418 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1419 { 1420 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1421 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1422 PetscErrorCode ierr; 1423 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1424 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1425 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1426 1427 PetscFunctionBegin; 1428 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1429 mat->getrowactive = PETSC_TRUE; 1430 1431 if (!mat->rowvalues && (idx || v)) { 1432 /* 1433 allocate enough space to hold information from the longest row. 1434 */ 1435 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1436 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1437 PetscInt max = 1,mbs = mat->mbs,tmp; 1438 for (i=0; i<mbs; i++) { 1439 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1440 if (max < tmp) max = tmp; 1441 } 1442 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1443 } 1444 1445 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1446 lrow = row - brstart; /* local row index */ 1447 1448 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1449 if (!v) {pvA = 0; pvB = 0;} 1450 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1451 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1452 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1453 nztot = nzA + nzB; 1454 1455 cmap = mat->garray; 1456 if (v || idx) { 1457 if (nztot) { 1458 /* Sort by increasing column numbers, assuming A and B already sorted */ 1459 PetscInt imark = -1; 1460 if (v) { 1461 *v = v_p = mat->rowvalues; 1462 for (i=0; i<nzB; i++) { 1463 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1464 else break; 1465 } 1466 imark = i; 1467 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1468 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1469 } 1470 if (idx) { 1471 *idx = idx_p = mat->rowindices; 1472 if (imark > -1) { 1473 for (i=0; i<imark; i++) { 1474 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1475 } 1476 } else { 1477 for (i=0; i<nzB; i++) { 1478 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1479 else break; 1480 } 1481 imark = i; 1482 } 1483 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1484 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1485 } 1486 } else { 1487 if (idx) *idx = 0; 1488 if (v) *v = 0; 1489 } 1490 } 1491 *nz = nztot; 1492 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1493 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1494 PetscFunctionReturn(0); 1495 } 1496 1497 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1498 { 1499 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1500 1501 PetscFunctionBegin; 1502 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1503 baij->getrowactive = PETSC_FALSE; 1504 PetscFunctionReturn(0); 1505 } 1506 1507 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1508 { 1509 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1510 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1511 1512 PetscFunctionBegin; 1513 aA->getrow_utriangular = PETSC_TRUE; 1514 PetscFunctionReturn(0); 1515 } 1516 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1517 { 1518 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1519 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1520 1521 PetscFunctionBegin; 1522 aA->getrow_utriangular = PETSC_FALSE; 1523 PetscFunctionReturn(0); 1524 } 1525 1526 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1527 { 1528 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1529 PetscErrorCode ierr; 1530 1531 PetscFunctionBegin; 1532 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1533 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1534 PetscFunctionReturn(0); 1535 } 1536 1537 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1538 { 1539 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1544 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1549 Input: isrow - distributed(parallel), 1550 iscol_local - locally owned (seq) 1551 */ 1552 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1553 { 1554 PetscErrorCode ierr; 1555 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1556 const PetscInt *ptr1,*ptr2; 1557 1558 PetscFunctionBegin; 1559 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1560 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1561 if (sz1 > sz2) { 1562 *flg = PETSC_FALSE; 1563 PetscFunctionReturn(0); 1564 } 1565 1566 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1567 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1568 1569 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1570 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1571 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1572 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1573 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1574 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1575 1576 nmatch=0; 1577 k = 0; 1578 for (i=0; i<sz1; i++){ 1579 for (j=k; j<sz2; j++){ 1580 if (a1[i] == a2[j]) { 1581 k = j; nmatch++; 1582 break; 1583 } 1584 } 1585 } 1586 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1587 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1588 ierr = PetscFree(a1);CHKERRQ(ierr); 1589 ierr = PetscFree(a2);CHKERRQ(ierr); 1590 if (nmatch < sz1) { 1591 *flg = PETSC_FALSE; 1592 } else { 1593 *flg = PETSC_TRUE; 1594 } 1595 PetscFunctionReturn(0); 1596 } 1597 1598 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1599 { 1600 PetscErrorCode ierr; 1601 IS iscol_local; 1602 PetscInt csize; 1603 PetscBool isequal; 1604 1605 PetscFunctionBegin; 1606 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1607 if (call == MAT_REUSE_MATRIX) { 1608 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1609 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1610 } else { 1611 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1612 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1613 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1614 } 1615 1616 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1617 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1618 if (call == MAT_INITIAL_MATRIX) { 1619 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1620 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1621 } 1622 PetscFunctionReturn(0); 1623 } 1624 1625 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1626 { 1627 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1628 PetscErrorCode ierr; 1629 1630 PetscFunctionBegin; 1631 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1632 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1633 PetscFunctionReturn(0); 1634 } 1635 1636 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1637 { 1638 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1639 Mat A = a->A,B = a->B; 1640 PetscErrorCode ierr; 1641 PetscReal isend[5],irecv[5]; 1642 1643 PetscFunctionBegin; 1644 info->block_size = (PetscReal)matin->rmap->bs; 1645 1646 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1647 1648 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1649 isend[3] = info->memory; isend[4] = info->mallocs; 1650 1651 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1652 1653 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1654 isend[3] += info->memory; isend[4] += info->mallocs; 1655 if (flag == MAT_LOCAL) { 1656 info->nz_used = isend[0]; 1657 info->nz_allocated = isend[1]; 1658 info->nz_unneeded = isend[2]; 1659 info->memory = isend[3]; 1660 info->mallocs = isend[4]; 1661 } else if (flag == MAT_GLOBAL_MAX) { 1662 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1663 1664 info->nz_used = irecv[0]; 1665 info->nz_allocated = irecv[1]; 1666 info->nz_unneeded = irecv[2]; 1667 info->memory = irecv[3]; 1668 info->mallocs = irecv[4]; 1669 } else if (flag == MAT_GLOBAL_SUM) { 1670 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1671 1672 info->nz_used = irecv[0]; 1673 info->nz_allocated = irecv[1]; 1674 info->nz_unneeded = irecv[2]; 1675 info->memory = irecv[3]; 1676 info->mallocs = irecv[4]; 1677 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1678 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1679 info->fill_ratio_needed = 0; 1680 info->factor_mallocs = 0; 1681 PetscFunctionReturn(0); 1682 } 1683 1684 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1685 { 1686 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1687 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1688 PetscErrorCode ierr; 1689 1690 PetscFunctionBegin; 1691 switch (op) { 1692 case MAT_NEW_NONZERO_LOCATIONS: 1693 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1694 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1695 case MAT_KEEP_NONZERO_PATTERN: 1696 case MAT_SUBMAT_SINGLEIS: 1697 case MAT_NEW_NONZERO_LOCATION_ERR: 1698 MatCheckPreallocated(A,1); 1699 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1700 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1701 break; 1702 case MAT_ROW_ORIENTED: 1703 MatCheckPreallocated(A,1); 1704 a->roworiented = flg; 1705 1706 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1707 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1708 break; 1709 case MAT_NEW_DIAGONALS: 1710 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1711 break; 1712 case MAT_IGNORE_OFF_PROC_ENTRIES: 1713 a->donotstash = flg; 1714 break; 1715 case MAT_USE_HASH_TABLE: 1716 a->ht_flag = flg; 1717 break; 1718 case MAT_HERMITIAN: 1719 MatCheckPreallocated(A,1); 1720 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1721 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1722 #if defined(PETSC_USE_COMPLEX) 1723 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1724 #endif 1725 break; 1726 case MAT_SPD: 1727 A->spd_set = PETSC_TRUE; 1728 A->spd = flg; 1729 if (flg) { 1730 A->symmetric = PETSC_TRUE; 1731 A->structurally_symmetric = PETSC_TRUE; 1732 A->symmetric_set = PETSC_TRUE; 1733 A->structurally_symmetric_set = PETSC_TRUE; 1734 } 1735 break; 1736 case MAT_SYMMETRIC: 1737 MatCheckPreallocated(A,1); 1738 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1739 break; 1740 case MAT_STRUCTURALLY_SYMMETRIC: 1741 MatCheckPreallocated(A,1); 1742 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1743 break; 1744 case MAT_SYMMETRY_ETERNAL: 1745 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1746 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1747 break; 1748 case MAT_IGNORE_LOWER_TRIANGULAR: 1749 aA->ignore_ltriangular = flg; 1750 break; 1751 case MAT_ERROR_LOWER_TRIANGULAR: 1752 aA->ignore_ltriangular = flg; 1753 break; 1754 case MAT_GETROW_UPPERTRIANGULAR: 1755 aA->getrow_utriangular = flg; 1756 break; 1757 default: 1758 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1759 } 1760 PetscFunctionReturn(0); 1761 } 1762 1763 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1764 { 1765 PetscErrorCode ierr; 1766 1767 PetscFunctionBegin; 1768 if (reuse == MAT_INITIAL_MATRIX) { 1769 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1770 } else if (reuse == MAT_REUSE_MATRIX) { 1771 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1772 } 1773 PetscFunctionReturn(0); 1774 } 1775 1776 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1777 { 1778 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1779 Mat a = baij->A, b=baij->B; 1780 PetscErrorCode ierr; 1781 PetscInt nv,m,n; 1782 PetscBool flg; 1783 1784 PetscFunctionBegin; 1785 if (ll != rr) { 1786 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1787 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1788 } 1789 if (!ll) PetscFunctionReturn(0); 1790 1791 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1792 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1793 1794 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1795 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1796 1797 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1798 1799 /* left diagonalscale the off-diagonal part */ 1800 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1801 1802 /* scale the diagonal part */ 1803 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1804 1805 /* right diagonalscale the off-diagonal part */ 1806 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1807 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1808 PetscFunctionReturn(0); 1809 } 1810 1811 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1812 { 1813 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1814 PetscErrorCode ierr; 1815 1816 PetscFunctionBegin; 1817 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1818 PetscFunctionReturn(0); 1819 } 1820 1821 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1822 1823 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1824 { 1825 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1826 Mat a,b,c,d; 1827 PetscBool flg; 1828 PetscErrorCode ierr; 1829 1830 PetscFunctionBegin; 1831 a = matA->A; b = matA->B; 1832 c = matB->A; d = matB->B; 1833 1834 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1835 if (flg) { 1836 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1837 } 1838 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1843 { 1844 PetscErrorCode ierr; 1845 PetscBool isbaij; 1846 1847 PetscFunctionBegin; 1848 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 1849 if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1850 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1851 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1852 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1853 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1854 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1855 } else { 1856 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1857 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1858 1859 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1860 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1861 } 1862 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1863 PetscFunctionReturn(0); 1864 } 1865 1866 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1867 { 1868 PetscErrorCode ierr; 1869 1870 PetscFunctionBegin; 1871 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 1875 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1876 { 1877 PetscErrorCode ierr; 1878 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1879 PetscBLASInt bnz,one=1; 1880 Mat_SeqSBAIJ *xa,*ya; 1881 Mat_SeqBAIJ *xb,*yb; 1882 1883 PetscFunctionBegin; 1884 if (str == SAME_NONZERO_PATTERN) { 1885 PetscScalar alpha = a; 1886 xa = (Mat_SeqSBAIJ*)xx->A->data; 1887 ya = (Mat_SeqSBAIJ*)yy->A->data; 1888 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1889 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1890 xb = (Mat_SeqBAIJ*)xx->B->data; 1891 yb = (Mat_SeqBAIJ*)yy->B->data; 1892 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1893 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1894 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1895 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1896 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1897 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1898 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1899 } else { 1900 Mat B; 1901 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1902 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1903 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1904 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1905 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1906 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1907 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1908 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1909 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1910 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1911 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1912 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1913 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1914 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1915 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1916 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1917 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1918 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1919 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1920 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1921 } 1922 PetscFunctionReturn(0); 1923 } 1924 1925 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1926 { 1927 PetscErrorCode ierr; 1928 PetscInt i; 1929 PetscBool flg; 1930 1931 PetscFunctionBegin; 1932 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1933 for (i=0; i<n; i++) { 1934 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1935 if (!flg) { 1936 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1937 } 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1943 { 1944 PetscErrorCode ierr; 1945 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1946 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1947 1948 PetscFunctionBegin; 1949 if (!Y->preallocated) { 1950 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1951 } else if (!aij->nz) { 1952 PetscInt nonew = aij->nonew; 1953 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1954 aij->nonew = nonew; 1955 } 1956 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1957 PetscFunctionReturn(0); 1958 } 1959 1960 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1961 { 1962 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1963 PetscErrorCode ierr; 1964 1965 PetscFunctionBegin; 1966 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1967 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1968 if (d) { 1969 PetscInt rstart; 1970 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1971 *d += rstart/A->rmap->bs; 1972 1973 } 1974 PetscFunctionReturn(0); 1975 } 1976 1977 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1978 { 1979 PetscFunctionBegin; 1980 *a = ((Mat_MPISBAIJ*)A->data)->A; 1981 PetscFunctionReturn(0); 1982 } 1983 1984 /* -------------------------------------------------------------------*/ 1985 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1986 MatGetRow_MPISBAIJ, 1987 MatRestoreRow_MPISBAIJ, 1988 MatMult_MPISBAIJ, 1989 /* 4*/ MatMultAdd_MPISBAIJ, 1990 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1991 MatMultAdd_MPISBAIJ, 1992 0, 1993 0, 1994 0, 1995 /* 10*/ 0, 1996 0, 1997 0, 1998 MatSOR_MPISBAIJ, 1999 MatTranspose_MPISBAIJ, 2000 /* 15*/ MatGetInfo_MPISBAIJ, 2001 MatEqual_MPISBAIJ, 2002 MatGetDiagonal_MPISBAIJ, 2003 MatDiagonalScale_MPISBAIJ, 2004 MatNorm_MPISBAIJ, 2005 /* 20*/ MatAssemblyBegin_MPISBAIJ, 2006 MatAssemblyEnd_MPISBAIJ, 2007 MatSetOption_MPISBAIJ, 2008 MatZeroEntries_MPISBAIJ, 2009 /* 24*/ 0, 2010 0, 2011 0, 2012 0, 2013 0, 2014 /* 29*/ MatSetUp_MPISBAIJ, 2015 0, 2016 0, 2017 MatGetDiagonalBlock_MPISBAIJ, 2018 0, 2019 /* 34*/ MatDuplicate_MPISBAIJ, 2020 0, 2021 0, 2022 0, 2023 0, 2024 /* 39*/ MatAXPY_MPISBAIJ, 2025 MatCreateSubMatrices_MPISBAIJ, 2026 MatIncreaseOverlap_MPISBAIJ, 2027 MatGetValues_MPISBAIJ, 2028 MatCopy_MPISBAIJ, 2029 /* 44*/ 0, 2030 MatScale_MPISBAIJ, 2031 MatShift_MPISBAIJ, 2032 0, 2033 0, 2034 /* 49*/ 0, 2035 0, 2036 0, 2037 0, 2038 0, 2039 /* 54*/ 0, 2040 0, 2041 MatSetUnfactored_MPISBAIJ, 2042 0, 2043 MatSetValuesBlocked_MPISBAIJ, 2044 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 2045 0, 2046 0, 2047 0, 2048 0, 2049 /* 64*/ 0, 2050 0, 2051 0, 2052 0, 2053 0, 2054 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 2055 0, 2056 0, 2057 0, 2058 0, 2059 /* 74*/ 0, 2060 0, 2061 0, 2062 0, 2063 0, 2064 /* 79*/ 0, 2065 0, 2066 0, 2067 0, 2068 MatLoad_MPISBAIJ, 2069 /* 84*/ 0, 2070 0, 2071 0, 2072 0, 2073 0, 2074 /* 89*/ 0, 2075 0, 2076 0, 2077 0, 2078 0, 2079 /* 94*/ 0, 2080 0, 2081 0, 2082 0, 2083 0, 2084 /* 99*/ 0, 2085 0, 2086 0, 2087 0, 2088 0, 2089 /*104*/ 0, 2090 MatRealPart_MPISBAIJ, 2091 MatImaginaryPart_MPISBAIJ, 2092 MatGetRowUpperTriangular_MPISBAIJ, 2093 MatRestoreRowUpperTriangular_MPISBAIJ, 2094 /*109*/ 0, 2095 0, 2096 0, 2097 0, 2098 MatMissingDiagonal_MPISBAIJ, 2099 /*114*/ 0, 2100 0, 2101 0, 2102 0, 2103 0, 2104 /*119*/ 0, 2105 0, 2106 0, 2107 0, 2108 0, 2109 /*124*/ 0, 2110 0, 2111 0, 2112 0, 2113 0, 2114 /*129*/ 0, 2115 0, 2116 0, 2117 0, 2118 0, 2119 /*134*/ 0, 2120 0, 2121 0, 2122 0, 2123 0, 2124 /*139*/ MatSetBlockSizes_Default, 2125 0, 2126 0, 2127 0, 2128 0, 2129 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2130 }; 2131 2132 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2133 { 2134 Mat_MPISBAIJ *b; 2135 PetscErrorCode ierr; 2136 PetscInt i,mbs,Mbs; 2137 PetscMPIInt size; 2138 2139 PetscFunctionBegin; 2140 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2141 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2142 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2143 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2144 2145 b = (Mat_MPISBAIJ*)B->data; 2146 mbs = B->rmap->n/bs; 2147 Mbs = B->rmap->N/bs; 2148 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2149 2150 B->rmap->bs = bs; 2151 b->bs2 = bs*bs; 2152 b->mbs = mbs; 2153 b->Mbs = Mbs; 2154 b->nbs = B->cmap->n/bs; 2155 b->Nbs = B->cmap->N/bs; 2156 2157 for (i=0; i<=b->size; i++) { 2158 b->rangebs[i] = B->rmap->range[i]/bs; 2159 } 2160 b->rstartbs = B->rmap->rstart/bs; 2161 b->rendbs = B->rmap->rend/bs; 2162 2163 b->cstartbs = B->cmap->rstart/bs; 2164 b->cendbs = B->cmap->rend/bs; 2165 2166 #if defined(PETSC_USE_CTABLE) 2167 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2168 #else 2169 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2170 #endif 2171 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2172 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2173 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2174 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2175 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2176 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2177 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2178 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2179 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2180 2181 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2182 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2183 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2184 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2185 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2186 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2187 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2188 2189 if (!B->preallocated) { 2190 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2191 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2192 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2193 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2194 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2195 } 2196 2197 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2198 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2199 2200 B->preallocated = PETSC_TRUE; 2201 B->was_assembled = PETSC_FALSE; 2202 B->assembled = PETSC_FALSE; 2203 PetscFunctionReturn(0); 2204 } 2205 2206 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2207 { 2208 PetscInt m,rstart,cstart,cend; 2209 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2210 const PetscInt *JJ =0; 2211 PetscScalar *values=0; 2212 PetscErrorCode ierr; 2213 2214 PetscFunctionBegin; 2215 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2216 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2217 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2218 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2219 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2220 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2221 m = B->rmap->n/bs; 2222 rstart = B->rmap->rstart/bs; 2223 cstart = B->cmap->rstart/bs; 2224 cend = B->cmap->rend/bs; 2225 2226 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2227 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2228 for (i=0; i<m; i++) { 2229 nz = ii[i+1] - ii[i]; 2230 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2231 nz_max = PetscMax(nz_max,nz); 2232 JJ = jj + ii[i]; 2233 for (j=0; j<nz; j++) { 2234 if (*JJ >= cstart) break; 2235 JJ++; 2236 } 2237 d = 0; 2238 for (; j<nz; j++) { 2239 if (*JJ++ >= cend) break; 2240 d++; 2241 } 2242 d_nnz[i] = d; 2243 o_nnz[i] = nz - d; 2244 } 2245 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2246 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2247 2248 values = (PetscScalar*)V; 2249 if (!values) { 2250 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2251 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2252 } 2253 for (i=0; i<m; i++) { 2254 PetscInt row = i + rstart; 2255 PetscInt ncols = ii[i+1] - ii[i]; 2256 const PetscInt *icols = jj + ii[i]; 2257 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2258 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2259 } 2260 2261 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2262 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2263 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2264 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 /*MC 2269 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2270 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2271 the matrix is stored. 2272 2273 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2274 can call MatSetOption(Mat, MAT_HERMITIAN); 2275 2276 Options Database Keys: 2277 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2278 2279 Level: beginner 2280 2281 .seealso: MatCreateMPISBAIJ 2282 M*/ 2283 2284 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2285 { 2286 Mat_MPISBAIJ *b; 2287 PetscErrorCode ierr; 2288 PetscBool flg = PETSC_FALSE; 2289 2290 PetscFunctionBegin; 2291 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2292 B->data = (void*)b; 2293 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2294 2295 B->ops->destroy = MatDestroy_MPISBAIJ; 2296 B->ops->view = MatView_MPISBAIJ; 2297 B->assembled = PETSC_FALSE; 2298 B->insertmode = NOT_SET_VALUES; 2299 2300 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2301 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2302 2303 /* build local table of row and column ownerships */ 2304 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2305 2306 /* build cache for off array entries formed */ 2307 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2308 2309 b->donotstash = PETSC_FALSE; 2310 b->colmap = NULL; 2311 b->garray = NULL; 2312 b->roworiented = PETSC_TRUE; 2313 2314 /* stuff used in block assembly */ 2315 b->barray = 0; 2316 2317 /* stuff used for matrix vector multiply */ 2318 b->lvec = 0; 2319 b->Mvctx = 0; 2320 b->slvec0 = 0; 2321 b->slvec0b = 0; 2322 b->slvec1 = 0; 2323 b->slvec1a = 0; 2324 b->slvec1b = 0; 2325 b->sMvctx = 0; 2326 2327 /* stuff for MatGetRow() */ 2328 b->rowindices = 0; 2329 b->rowvalues = 0; 2330 b->getrowactive = PETSC_FALSE; 2331 2332 /* hash table stuff */ 2333 b->ht = 0; 2334 b->hd = 0; 2335 b->ht_size = 0; 2336 b->ht_flag = PETSC_FALSE; 2337 b->ht_fact = 0; 2338 b->ht_total_ct = 0; 2339 b->ht_insert_ct = 0; 2340 2341 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2342 b->ijonly = PETSC_FALSE; 2343 2344 b->in_loc = 0; 2345 b->v_loc = 0; 2346 b->n_loc = 0; 2347 2348 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2349 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2350 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2351 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2352 #if defined(PETSC_HAVE_ELEMENTAL) 2353 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2354 #endif 2355 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr); 2356 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr); 2357 2358 B->symmetric = PETSC_TRUE; 2359 B->structurally_symmetric = PETSC_TRUE; 2360 B->symmetric_set = PETSC_TRUE; 2361 B->structurally_symmetric_set = PETSC_TRUE; 2362 B->symmetric_eternal = PETSC_TRUE; 2363 2364 B->hermitian = PETSC_FALSE; 2365 B->hermitian_set = PETSC_FALSE; 2366 2367 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2368 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2369 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2370 if (flg) { 2371 PetscReal fact = 1.39; 2372 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2373 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2374 if (fact <= 1.0) fact = 1.39; 2375 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2376 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2377 } 2378 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 /*MC 2383 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2384 2385 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2386 and MATMPISBAIJ otherwise. 2387 2388 Options Database Keys: 2389 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2390 2391 Level: beginner 2392 2393 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2394 M*/ 2395 2396 /*@C 2397 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2398 the user should preallocate the matrix storage by setting the parameters 2399 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2400 performance can be increased by more than a factor of 50. 2401 2402 Collective on Mat 2403 2404 Input Parameters: 2405 + B - the matrix 2406 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2407 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2408 . d_nz - number of block nonzeros per block row in diagonal portion of local 2409 submatrix (same for all local rows) 2410 . d_nnz - array containing the number of block nonzeros in the various block rows 2411 in the upper triangular and diagonal part of the in diagonal portion of the local 2412 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2413 for the diagonal entry and set a value even if it is zero. 2414 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2415 submatrix (same for all local rows). 2416 - o_nnz - array containing the number of nonzeros in the various block rows of the 2417 off-diagonal portion of the local submatrix that is right of the diagonal 2418 (possibly different for each block row) or NULL. 2419 2420 2421 Options Database Keys: 2422 . -mat_no_unroll - uses code that does not unroll the loops in the 2423 block calculations (much slower) 2424 . -mat_block_size - size of the blocks to use 2425 2426 Notes: 2427 2428 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2429 than it must be used on all processors that share the object for that argument. 2430 2431 If the *_nnz parameter is given then the *_nz parameter is ignored 2432 2433 Storage Information: 2434 For a square global matrix we define each processor's diagonal portion 2435 to be its local rows and the corresponding columns (a square submatrix); 2436 each processor's off-diagonal portion encompasses the remainder of the 2437 local matrix (a rectangular submatrix). 2438 2439 The user can specify preallocated storage for the diagonal part of 2440 the local submatrix with either d_nz or d_nnz (not both). Set 2441 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2442 memory allocation. Likewise, specify preallocated storage for the 2443 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2444 2445 You can call MatGetInfo() to get information on how effective the preallocation was; 2446 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2447 You can also run with the option -info and look for messages with the string 2448 malloc in them to see if additional memory allocation was needed. 2449 2450 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2451 the figure below we depict these three local rows and all columns (0-11). 2452 2453 .vb 2454 0 1 2 3 4 5 6 7 8 9 10 11 2455 -------------------------- 2456 row 3 |. . . d d d o o o o o o 2457 row 4 |. . . d d d o o o o o o 2458 row 5 |. . . d d d o o o o o o 2459 -------------------------- 2460 .ve 2461 2462 Thus, any entries in the d locations are stored in the d (diagonal) 2463 submatrix, and any entries in the o locations are stored in the 2464 o (off-diagonal) submatrix. Note that the d matrix is stored in 2465 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2466 2467 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2468 plus the diagonal part of the d matrix, 2469 and o_nz should indicate the number of block nonzeros per row in the o matrix 2470 2471 In general, for PDE problems in which most nonzeros are near the diagonal, 2472 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2473 or you will get TERRIBLE performance; see the users' manual chapter on 2474 matrices. 2475 2476 Level: intermediate 2477 2478 .keywords: matrix, block, aij, compressed row, sparse, parallel 2479 2480 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2481 @*/ 2482 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2483 { 2484 PetscErrorCode ierr; 2485 2486 PetscFunctionBegin; 2487 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2488 PetscValidType(B,1); 2489 PetscValidLogicalCollectiveInt(B,bs,2); 2490 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2491 PetscFunctionReturn(0); 2492 } 2493 2494 /*@C 2495 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2496 (block compressed row). For good matrix assembly performance 2497 the user should preallocate the matrix storage by setting the parameters 2498 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2499 performance can be increased by more than a factor of 50. 2500 2501 Collective on MPI_Comm 2502 2503 Input Parameters: 2504 + comm - MPI communicator 2505 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2506 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2507 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2508 This value should be the same as the local size used in creating the 2509 y vector for the matrix-vector product y = Ax. 2510 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2511 This value should be the same as the local size used in creating the 2512 x vector for the matrix-vector product y = Ax. 2513 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2514 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2515 . d_nz - number of block nonzeros per block row in diagonal portion of local 2516 submatrix (same for all local rows) 2517 . d_nnz - array containing the number of block nonzeros in the various block rows 2518 in the upper triangular portion of the in diagonal portion of the local 2519 (possibly different for each block block row) or NULL. 2520 If you plan to factor the matrix you must leave room for the diagonal entry and 2521 set its value even if it is zero. 2522 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2523 submatrix (same for all local rows). 2524 - o_nnz - array containing the number of nonzeros in the various block rows of the 2525 off-diagonal portion of the local submatrix (possibly different for 2526 each block row) or NULL. 2527 2528 Output Parameter: 2529 . A - the matrix 2530 2531 Options Database Keys: 2532 . -mat_no_unroll - uses code that does not unroll the loops in the 2533 block calculations (much slower) 2534 . -mat_block_size - size of the blocks to use 2535 . -mat_mpi - use the parallel matrix data structures even on one processor 2536 (defaults to using SeqBAIJ format on one processor) 2537 2538 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2539 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2540 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2541 2542 Notes: 2543 The number of rows and columns must be divisible by blocksize. 2544 This matrix type does not support complex Hermitian operation. 2545 2546 The user MUST specify either the local or global matrix dimensions 2547 (possibly both). 2548 2549 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2550 than it must be used on all processors that share the object for that argument. 2551 2552 If the *_nnz parameter is given then the *_nz parameter is ignored 2553 2554 Storage Information: 2555 For a square global matrix we define each processor's diagonal portion 2556 to be its local rows and the corresponding columns (a square submatrix); 2557 each processor's off-diagonal portion encompasses the remainder of the 2558 local matrix (a rectangular submatrix). 2559 2560 The user can specify preallocated storage for the diagonal part of 2561 the local submatrix with either d_nz or d_nnz (not both). Set 2562 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2563 memory allocation. Likewise, specify preallocated storage for the 2564 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2565 2566 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2567 the figure below we depict these three local rows and all columns (0-11). 2568 2569 .vb 2570 0 1 2 3 4 5 6 7 8 9 10 11 2571 -------------------------- 2572 row 3 |. . . d d d o o o o o o 2573 row 4 |. . . d d d o o o o o o 2574 row 5 |. . . d d d o o o o o o 2575 -------------------------- 2576 .ve 2577 2578 Thus, any entries in the d locations are stored in the d (diagonal) 2579 submatrix, and any entries in the o locations are stored in the 2580 o (off-diagonal) submatrix. Note that the d matrix is stored in 2581 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2582 2583 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2584 plus the diagonal part of the d matrix, 2585 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2586 In general, for PDE problems in which most nonzeros are near the diagonal, 2587 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2588 or you will get TERRIBLE performance; see the users' manual chapter on 2589 matrices. 2590 2591 Level: intermediate 2592 2593 .keywords: matrix, block, aij, compressed row, sparse, parallel 2594 2595 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2596 @*/ 2597 2598 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2599 { 2600 PetscErrorCode ierr; 2601 PetscMPIInt size; 2602 2603 PetscFunctionBegin; 2604 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2605 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2606 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2607 if (size > 1) { 2608 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2609 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2610 } else { 2611 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2612 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2613 } 2614 PetscFunctionReturn(0); 2615 } 2616 2617 2618 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2619 { 2620 Mat mat; 2621 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2622 PetscErrorCode ierr; 2623 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2624 PetscScalar *array; 2625 2626 PetscFunctionBegin; 2627 *newmat = 0; 2628 2629 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2630 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2631 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2632 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2633 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2634 2635 mat->factortype = matin->factortype; 2636 mat->preallocated = PETSC_TRUE; 2637 mat->assembled = PETSC_TRUE; 2638 mat->insertmode = NOT_SET_VALUES; 2639 2640 a = (Mat_MPISBAIJ*)mat->data; 2641 a->bs2 = oldmat->bs2; 2642 a->mbs = oldmat->mbs; 2643 a->nbs = oldmat->nbs; 2644 a->Mbs = oldmat->Mbs; 2645 a->Nbs = oldmat->Nbs; 2646 2647 a->size = oldmat->size; 2648 a->rank = oldmat->rank; 2649 a->donotstash = oldmat->donotstash; 2650 a->roworiented = oldmat->roworiented; 2651 a->rowindices = 0; 2652 a->rowvalues = 0; 2653 a->getrowactive = PETSC_FALSE; 2654 a->barray = 0; 2655 a->rstartbs = oldmat->rstartbs; 2656 a->rendbs = oldmat->rendbs; 2657 a->cstartbs = oldmat->cstartbs; 2658 a->cendbs = oldmat->cendbs; 2659 2660 /* hash table stuff */ 2661 a->ht = 0; 2662 a->hd = 0; 2663 a->ht_size = 0; 2664 a->ht_flag = oldmat->ht_flag; 2665 a->ht_fact = oldmat->ht_fact; 2666 a->ht_total_ct = 0; 2667 a->ht_insert_ct = 0; 2668 2669 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2670 if (oldmat->colmap) { 2671 #if defined(PETSC_USE_CTABLE) 2672 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2673 #else 2674 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2675 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2676 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2677 #endif 2678 } else a->colmap = 0; 2679 2680 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2681 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2682 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2683 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2684 } else a->garray = 0; 2685 2686 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2687 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2688 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2689 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2690 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2691 2692 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2693 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2694 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2695 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2696 2697 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2698 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2699 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2700 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2701 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2702 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2703 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2704 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2705 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2706 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2707 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2708 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2709 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2710 2711 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2712 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2713 a->sMvctx = oldmat->sMvctx; 2714 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2715 2716 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2717 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2718 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2719 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2720 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2721 *newmat = mat; 2722 PetscFunctionReturn(0); 2723 } 2724 2725 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2726 { 2727 PetscErrorCode ierr; 2728 PetscInt i,nz,j,rstart,rend; 2729 PetscScalar *vals,*buf; 2730 MPI_Comm comm; 2731 MPI_Status status; 2732 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2733 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2734 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2735 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2736 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2737 PetscInt dcount,kmax,k,nzcount,tmp; 2738 int fd; 2739 PetscBool isbinary; 2740 2741 PetscFunctionBegin; 2742 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2743 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 2744 2745 /* force binary viewer to load .info file if it has not yet done so */ 2746 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2747 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2748 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2749 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2750 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2751 if (bs < 0) bs = 1; 2752 2753 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2754 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2755 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2756 if (!rank) { 2757 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2758 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2759 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2760 } 2761 2762 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2763 M = header[1]; 2764 N = header[2]; 2765 2766 /* If global sizes are set, check if they are consistent with that given in the file */ 2767 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2768 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2769 2770 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2771 2772 /* 2773 This code adds extra rows to make sure the number of rows is 2774 divisible by the blocksize 2775 */ 2776 Mbs = M/bs; 2777 extra_rows = bs - M + bs*(Mbs); 2778 if (extra_rows == bs) extra_rows = 0; 2779 else Mbs++; 2780 if (extra_rows &&!rank) { 2781 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2782 } 2783 2784 /* determine ownership of all rows */ 2785 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2786 mbs = Mbs/size + ((Mbs % size) > rank); 2787 m = mbs*bs; 2788 } else { /* User Set */ 2789 m = newmat->rmap->n; 2790 mbs = m/bs; 2791 } 2792 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2793 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2794 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2795 rowners[0] = 0; 2796 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2797 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2798 rstart = rowners[rank]; 2799 rend = rowners[rank+1]; 2800 2801 /* distribute row lengths to all processors */ 2802 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2803 if (!rank) { 2804 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2805 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2806 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2807 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2808 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2809 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2810 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2811 } else { 2812 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2813 } 2814 2815 if (!rank) { /* procs[0] */ 2816 /* calculate the number of nonzeros on each processor */ 2817 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2818 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2819 for (i=0; i<size; i++) { 2820 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2821 procsnz[i] += rowlengths[j]; 2822 } 2823 } 2824 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2825 2826 /* determine max buffer needed and allocate it */ 2827 maxnz = 0; 2828 for (i=0; i<size; i++) { 2829 maxnz = PetscMax(maxnz,procsnz[i]); 2830 } 2831 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2832 2833 /* read in my part of the matrix column indices */ 2834 nz = procsnz[0]; 2835 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2836 mycols = ibuf; 2837 if (size == 1) nz -= extra_rows; 2838 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2839 if (size == 1) { 2840 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2841 } 2842 2843 /* read in every ones (except the last) and ship off */ 2844 for (i=1; i<size-1; i++) { 2845 nz = procsnz[i]; 2846 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2847 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2848 } 2849 /* read in the stuff for the last proc */ 2850 if (size != 1) { 2851 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2852 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2853 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2854 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2855 } 2856 ierr = PetscFree(cols);CHKERRQ(ierr); 2857 } else { /* procs[i], i>0 */ 2858 /* determine buffer space needed for message */ 2859 nz = 0; 2860 for (i=0; i<m; i++) nz += locrowlens[i]; 2861 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2862 mycols = ibuf; 2863 /* receive message of column indices*/ 2864 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2865 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2866 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2867 } 2868 2869 /* loop over local rows, determining number of off diagonal entries */ 2870 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2871 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2872 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2873 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2874 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2875 rowcount = 0; 2876 nzcount = 0; 2877 for (i=0; i<mbs; i++) { 2878 dcount = 0; 2879 odcount = 0; 2880 for (j=0; j<bs; j++) { 2881 kmax = locrowlens[rowcount]; 2882 for (k=0; k<kmax; k++) { 2883 tmp = mycols[nzcount++]/bs; /* block col. index */ 2884 if (!mask[tmp]) { 2885 mask[tmp] = 1; 2886 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2887 else masked1[dcount++] = tmp; /* entry in diag portion */ 2888 } 2889 } 2890 rowcount++; 2891 } 2892 2893 dlens[i] = dcount; /* d_nzz[i] */ 2894 odlens[i] = odcount; /* o_nzz[i] */ 2895 2896 /* zero out the mask elements we set */ 2897 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2898 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2899 } 2900 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2901 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2902 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2903 2904 if (!rank) { 2905 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2906 /* read in my part of the matrix numerical values */ 2907 nz = procsnz[0]; 2908 vals = buf; 2909 mycols = ibuf; 2910 if (size == 1) nz -= extra_rows; 2911 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2912 if (size == 1) { 2913 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2914 } 2915 2916 /* insert into matrix */ 2917 jj = rstart*bs; 2918 for (i=0; i<m; i++) { 2919 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2920 mycols += locrowlens[i]; 2921 vals += locrowlens[i]; 2922 jj++; 2923 } 2924 2925 /* read in other processors (except the last one) and ship out */ 2926 for (i=1; i<size-1; i++) { 2927 nz = procsnz[i]; 2928 vals = buf; 2929 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2930 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2931 } 2932 /* the last proc */ 2933 if (size != 1) { 2934 nz = procsnz[i] - extra_rows; 2935 vals = buf; 2936 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2937 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2938 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2939 } 2940 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2941 2942 } else { 2943 /* receive numeric values */ 2944 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2945 2946 /* receive message of values*/ 2947 vals = buf; 2948 mycols = ibuf; 2949 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2950 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2951 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2952 2953 /* insert into matrix */ 2954 jj = rstart*bs; 2955 for (i=0; i<m; i++) { 2956 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2957 mycols += locrowlens[i]; 2958 vals += locrowlens[i]; 2959 jj++; 2960 } 2961 } 2962 2963 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2964 ierr = PetscFree(buf);CHKERRQ(ierr); 2965 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2966 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2967 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2968 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2969 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2970 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2971 PetscFunctionReturn(0); 2972 } 2973 2974 /*XXXXX@ 2975 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2976 2977 Input Parameters: 2978 . mat - the matrix 2979 . fact - factor 2980 2981 Not Collective on Mat, each process can have a different hash factor 2982 2983 Level: advanced 2984 2985 Notes: 2986 This can also be set by the command line option: -mat_use_hash_table fact 2987 2988 .keywords: matrix, hashtable, factor, HT 2989 2990 .seealso: MatSetOption() 2991 @XXXXX*/ 2992 2993 2994 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2995 { 2996 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2997 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2998 PetscReal atmp; 2999 PetscReal *work,*svalues,*rvalues; 3000 PetscErrorCode ierr; 3001 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 3002 PetscMPIInt rank,size; 3003 PetscInt *rowners_bs,dest,count,source; 3004 PetscScalar *va; 3005 MatScalar *ba; 3006 MPI_Status stat; 3007 3008 PetscFunctionBegin; 3009 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 3010 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 3011 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3012 3013 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3014 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3015 3016 bs = A->rmap->bs; 3017 mbs = a->mbs; 3018 Mbs = a->Mbs; 3019 ba = b->a; 3020 bi = b->i; 3021 bj = b->j; 3022 3023 /* find ownerships */ 3024 rowners_bs = A->rmap->range; 3025 3026 /* each proc creates an array to be distributed */ 3027 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 3028 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 3029 3030 /* row_max for B */ 3031 if (rank != size-1) { 3032 for (i=0; i<mbs; i++) { 3033 ncols = bi[1] - bi[0]; bi++; 3034 brow = bs*i; 3035 for (j=0; j<ncols; j++) { 3036 bcol = bs*(*bj); 3037 for (kcol=0; kcol<bs; kcol++) { 3038 col = bcol + kcol; /* local col index */ 3039 col += rowners_bs[rank+1]; /* global col index */ 3040 for (krow=0; krow<bs; krow++) { 3041 atmp = PetscAbsScalar(*ba); ba++; 3042 row = brow + krow; /* local row index */ 3043 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 3044 if (work[col] < atmp) work[col] = atmp; 3045 } 3046 } 3047 bj++; 3048 } 3049 } 3050 3051 /* send values to its owners */ 3052 for (dest=rank+1; dest<size; dest++) { 3053 svalues = work + rowners_bs[dest]; 3054 count = rowners_bs[dest+1]-rowners_bs[dest]; 3055 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 3056 } 3057 } 3058 3059 /* receive values */ 3060 if (rank) { 3061 rvalues = work; 3062 count = rowners_bs[rank+1]-rowners_bs[rank]; 3063 for (source=0; source<rank; source++) { 3064 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 3065 /* process values */ 3066 for (i=0; i<count; i++) { 3067 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 3068 } 3069 } 3070 } 3071 3072 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3073 ierr = PetscFree(work);CHKERRQ(ierr); 3074 PetscFunctionReturn(0); 3075 } 3076 3077 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3078 { 3079 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3080 PetscErrorCode ierr; 3081 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 3082 PetscScalar *x,*ptr,*from; 3083 Vec bb1; 3084 const PetscScalar *b; 3085 3086 PetscFunctionBegin; 3087 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3088 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3089 3090 if (flag == SOR_APPLY_UPPER) { 3091 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3092 PetscFunctionReturn(0); 3093 } 3094 3095 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3096 if (flag & SOR_ZERO_INITIAL_GUESS) { 3097 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3098 its--; 3099 } 3100 3101 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3102 while (its--) { 3103 3104 /* lower triangular part: slvec0b = - B^T*xx */ 3105 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3106 3107 /* copy xx into slvec0a */ 3108 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3109 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3110 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3111 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3112 3113 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3114 3115 /* copy bb into slvec1a */ 3116 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3117 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3118 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3119 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3120 3121 /* set slvec1b = 0 */ 3122 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3123 3124 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3125 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3126 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3127 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3128 3129 /* upper triangular part: bb1 = bb1 - B*x */ 3130 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3131 3132 /* local diagonal sweep */ 3133 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3134 } 3135 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3136 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3137 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3138 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3139 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3140 } else if (flag & SOR_EISENSTAT) { 3141 Vec xx1; 3142 PetscBool hasop; 3143 const PetscScalar *diag; 3144 PetscScalar *sl,scale = (omega - 2.0)/omega; 3145 PetscInt i,n; 3146 3147 if (!mat->xx1) { 3148 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3149 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3150 } 3151 xx1 = mat->xx1; 3152 bb1 = mat->bb1; 3153 3154 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3155 3156 if (!mat->diag) { 3157 /* this is wrong for same matrix with new nonzero values */ 3158 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3159 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3160 } 3161 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3162 3163 if (hasop) { 3164 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3165 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3166 } else { 3167 /* 3168 These two lines are replaced by code that may be a bit faster for a good compiler 3169 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3170 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3171 */ 3172 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3173 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3175 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3176 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3177 if (omega == 1.0) { 3178 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3179 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3180 } else { 3181 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3182 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3183 } 3184 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3185 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3186 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3187 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3188 } 3189 3190 /* multiply off-diagonal portion of matrix */ 3191 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3192 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3193 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3194 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3195 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3196 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3197 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3198 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3199 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3200 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3201 3202 /* local sweep */ 3203 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3204 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3205 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3206 PetscFunctionReturn(0); 3207 } 3208 3209 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3210 { 3211 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3212 PetscErrorCode ierr; 3213 Vec lvec1,bb1; 3214 3215 PetscFunctionBegin; 3216 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3217 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3218 3219 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3220 if (flag & SOR_ZERO_INITIAL_GUESS) { 3221 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3222 its--; 3223 } 3224 3225 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3226 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3227 while (its--) { 3228 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3229 3230 /* lower diagonal part: bb1 = bb - B^T*xx */ 3231 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3232 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3233 3234 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3235 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3236 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3237 3238 /* upper diagonal part: bb1 = bb1 - B*x */ 3239 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3240 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3241 3242 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3243 3244 /* diagonal sweep */ 3245 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3246 } 3247 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3248 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3249 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3250 PetscFunctionReturn(0); 3251 } 3252 3253 /*@ 3254 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3255 CSR format the local rows. 3256 3257 Collective on MPI_Comm 3258 3259 Input Parameters: 3260 + comm - MPI communicator 3261 . bs - the block size, only a block size of 1 is supported 3262 . m - number of local rows (Cannot be PETSC_DECIDE) 3263 . n - This value should be the same as the local size used in creating the 3264 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3265 calculated if N is given) For square matrices n is almost always m. 3266 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3267 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3268 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 3269 . j - column indices 3270 - a - matrix values 3271 3272 Output Parameter: 3273 . mat - the matrix 3274 3275 Level: intermediate 3276 3277 Notes: 3278 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3279 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3280 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3281 3282 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3283 3284 .keywords: matrix, aij, compressed row, sparse, parallel 3285 3286 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3287 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3288 @*/ 3289 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3290 { 3291 PetscErrorCode ierr; 3292 3293 3294 PetscFunctionBegin; 3295 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3296 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3297 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3298 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3299 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3300 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3301 PetscFunctionReturn(0); 3302 } 3303 3304 3305 /*@C 3306 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3307 (the default parallel PETSc format). 3308 3309 Collective on MPI_Comm 3310 3311 Input Parameters: 3312 + B - the matrix 3313 . bs - the block size 3314 . i - the indices into j for the start of each local row (starts with zero) 3315 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3316 - v - optional values in the matrix 3317 3318 Level: developer 3319 3320 .keywords: matrix, aij, compressed row, sparse, parallel 3321 3322 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3323 @*/ 3324 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3325 { 3326 PetscErrorCode ierr; 3327 3328 PetscFunctionBegin; 3329 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3330 PetscFunctionReturn(0); 3331 } 3332 3333 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3334 { 3335 PetscErrorCode ierr; 3336 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3337 PetscInt *indx; 3338 PetscScalar *values; 3339 3340 PetscFunctionBegin; 3341 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3342 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3343 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3344 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3345 PetscInt *bindx,rmax=a->rmax,j; 3346 3347 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3348 mbs = m/bs; Nbs = N/cbs; 3349 if (n == PETSC_DECIDE) { 3350 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3351 } 3352 /* Check sum(n) = Nbs */ 3353 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3354 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3355 3356 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3357 rstart -= mbs; 3358 3359 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3360 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3361 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3362 for (i=0; i<mbs; i++) { 3363 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3364 nnz = nnz/bs; 3365 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3366 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3367 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3368 } 3369 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3370 ierr = PetscFree(bindx);CHKERRQ(ierr); 3371 3372 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3373 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3374 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3375 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3376 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3377 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3378 } 3379 3380 /* numeric phase */ 3381 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3382 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3383 3384 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3385 for (i=0; i<m; i++) { 3386 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3387 Ii = i + rstart; 3388 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3389 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3390 } 3391 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3392 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3393 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3394 PetscFunctionReturn(0); 3395 } 3396