1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 /* This could be moved to matimpl.h */ 12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 13 { 14 Mat preallocator; 15 PetscInt r,rstart,rend; 16 PetscInt bs,i,m,n,M,N; 17 PetscBool cong = PETSC_TRUE; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 22 PetscValidLogicalCollectiveInt(B,nm,2); 23 for (i = 0; i < nm; i++) { 24 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 25 ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 26 if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 27 } 28 PetscValidLogicalCollectiveBool(B,fill,5); 29 ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 30 ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 31 ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 32 ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 33 ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 34 ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 35 ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 36 ierr = MatSetUp(preallocator);CHKERRQ(ierr); 37 ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 38 for (r = rstart; r < rend; ++r) { 39 PetscInt ncols; 40 const PetscInt *row; 41 const PetscScalar *vals; 42 43 for (i = 0; i < nm; i++) { 44 ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 45 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 46 if (symm && symm[i]) { 47 ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 48 } 49 ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 50 } 51 } 52 ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 53 ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 54 ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 55 ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 static PetscErrorCode MatConvert_MPISBAIJ_XAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 60 { 61 Mat B; 62 PetscErrorCode ierr; 63 PetscInt r; 64 65 PetscFunctionBegin; 66 if (reuse != MAT_REUSE_MATRIX) { 67 PetscBool symm = PETSC_TRUE; 68 PetscInt bs; 69 70 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 71 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 72 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 73 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 74 ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 75 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 76 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 77 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 78 ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 79 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 80 } else B = *newmat; 81 82 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 83 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 84 PetscInt ncols; 85 const PetscInt *row; 86 const PetscScalar *vals; 87 88 ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 89 ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 90 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 91 ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 92 } 93 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 94 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96 97 if (reuse == MAT_INPLACE_MATRIX) { 98 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 99 } else { 100 *newmat = B; 101 } 102 PetscFunctionReturn(0); 103 } 104 105 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 106 { 107 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 108 PetscErrorCode ierr; 109 110 PetscFunctionBegin; 111 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 112 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 117 { 118 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 119 PetscErrorCode ierr; 120 121 PetscFunctionBegin; 122 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 123 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 124 PetscFunctionReturn(0); 125 } 126 127 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 128 { \ 129 brow = row/bs; \ 130 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 131 rmax = aimax[brow]; nrow = ailen[brow]; \ 132 bcol = col/bs; \ 133 ridx = row % bs; cidx = col % bs; \ 134 low = 0; high = nrow; \ 135 while (high-low > 3) { \ 136 t = (low+high)/2; \ 137 if (rp[t] > bcol) high = t; \ 138 else low = t; \ 139 } \ 140 for (_i=low; _i<high; _i++) { \ 141 if (rp[_i] > bcol) break; \ 142 if (rp[_i] == bcol) { \ 143 bap = ap + bs2*_i + bs*cidx + ridx; \ 144 if (addv == ADD_VALUES) *bap += value; \ 145 else *bap = value; \ 146 goto a_noinsert; \ 147 } \ 148 } \ 149 if (a->nonew == 1) goto a_noinsert; \ 150 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 151 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 152 N = nrow++ - 1; \ 153 /* shift up all the later entries in this row */ \ 154 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 155 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 156 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 157 rp[_i] = bcol; \ 158 ap[bs2*_i + bs*cidx + ridx] = value; \ 159 A->nonzerostate++;\ 160 a_noinsert:; \ 161 ailen[brow] = nrow; \ 162 } 163 164 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 165 { \ 166 brow = row/bs; \ 167 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 168 rmax = bimax[brow]; nrow = bilen[brow]; \ 169 bcol = col/bs; \ 170 ridx = row % bs; cidx = col % bs; \ 171 low = 0; high = nrow; \ 172 while (high-low > 3) { \ 173 t = (low+high)/2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i=low; _i<high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2*_i + bs*cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto b_noinsert; \ 184 } \ 185 } \ 186 if (b->nonew == 1) goto b_noinsert; \ 187 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 192 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 193 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 194 rp[_i] = bcol; \ 195 ap[bs2*_i + bs*cidx + ridx] = value; \ 196 B->nonzerostate++;\ 197 b_noinsert:; \ 198 bilen[brow] = nrow; \ 199 } 200 201 /* Only add/insert a(i,j) with i<=j (blocks). 202 Any a(i,j) with i>j input by user is ingored or generates an error 203 */ 204 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 205 { 206 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 207 MatScalar value; 208 PetscBool roworiented = baij->roworiented; 209 PetscErrorCode ierr; 210 PetscInt i,j,row,col; 211 PetscInt rstart_orig=mat->rmap->rstart; 212 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 213 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 214 215 /* Some Variables required in the macro */ 216 Mat A = baij->A; 217 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 218 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 219 MatScalar *aa =a->a; 220 221 Mat B = baij->B; 222 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 223 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 224 MatScalar *ba =b->a; 225 226 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 227 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 228 MatScalar *ap,*bap; 229 230 /* for stash */ 231 PetscInt n_loc, *in_loc = NULL; 232 MatScalar *v_loc = NULL; 233 234 PetscFunctionBegin; 235 if (!baij->donotstash) { 236 if (n > baij->n_loc) { 237 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 238 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 239 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 240 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 241 242 baij->n_loc = n; 243 } 244 in_loc = baij->in_loc; 245 v_loc = baij->v_loc; 246 } 247 248 for (i=0; i<m; i++) { 249 if (im[i] < 0) continue; 250 #if defined(PETSC_USE_DEBUG) 251 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 252 #endif 253 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 254 row = im[i] - rstart_orig; /* local row index */ 255 for (j=0; j<n; j++) { 256 if (im[i]/bs > in[j]/bs) { 257 if (a->ignore_ltriangular) { 258 continue; /* ignore lower triangular blocks */ 259 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 260 } 261 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 262 col = in[j] - cstart_orig; /* local col index */ 263 brow = row/bs; bcol = col/bs; 264 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 265 if (roworiented) value = v[i*n+j]; 266 else value = v[i+j*m]; 267 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 268 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 269 } else if (in[j] < 0) continue; 270 #if defined(PETSC_USE_DEBUG) 271 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 272 #endif 273 else { /* off-diag entry (B) */ 274 if (mat->was_assembled) { 275 if (!baij->colmap) { 276 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 277 } 278 #if defined(PETSC_USE_CTABLE) 279 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 280 col = col - 1; 281 #else 282 col = baij->colmap[in[j]/bs] - 1; 283 #endif 284 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 285 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 286 col = in[j]; 287 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 288 B = baij->B; 289 b = (Mat_SeqBAIJ*)(B)->data; 290 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 291 ba = b->a; 292 } else col += in[j]%bs; 293 } else col = in[j]; 294 if (roworiented) value = v[i*n+j]; 295 else value = v[i+j*m]; 296 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 297 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 298 } 299 } 300 } else { /* off processor entry */ 301 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 302 if (!baij->donotstash) { 303 mat->assembled = PETSC_FALSE; 304 n_loc = 0; 305 for (j=0; j<n; j++) { 306 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 307 in_loc[n_loc] = in[j]; 308 if (roworiented) { 309 v_loc[n_loc] = v[i*n+j]; 310 } else { 311 v_loc[n_loc] = v[j*m+i]; 312 } 313 n_loc++; 314 } 315 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 316 } 317 } 318 } 319 PetscFunctionReturn(0); 320 } 321 322 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 323 { 324 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 325 PetscErrorCode ierr; 326 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 327 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 328 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 329 PetscBool roworiented=a->roworiented; 330 const PetscScalar *value = v; 331 MatScalar *ap,*aa = a->a,*bap; 332 333 PetscFunctionBegin; 334 if (col < row) { 335 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 336 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 337 } 338 rp = aj + ai[row]; 339 ap = aa + bs2*ai[row]; 340 rmax = imax[row]; 341 nrow = ailen[row]; 342 value = v; 343 low = 0; 344 high = nrow; 345 346 while (high-low > 7) { 347 t = (low+high)/2; 348 if (rp[t] > col) high = t; 349 else low = t; 350 } 351 for (i=low; i<high; i++) { 352 if (rp[i] > col) break; 353 if (rp[i] == col) { 354 bap = ap + bs2*i; 355 if (roworiented) { 356 if (is == ADD_VALUES) { 357 for (ii=0; ii<bs; ii++) { 358 for (jj=ii; jj<bs2; jj+=bs) { 359 bap[jj] += *value++; 360 } 361 } 362 } else { 363 for (ii=0; ii<bs; ii++) { 364 for (jj=ii; jj<bs2; jj+=bs) { 365 bap[jj] = *value++; 366 } 367 } 368 } 369 } else { 370 if (is == ADD_VALUES) { 371 for (ii=0; ii<bs; ii++) { 372 for (jj=0; jj<bs; jj++) { 373 *bap++ += *value++; 374 } 375 } 376 } else { 377 for (ii=0; ii<bs; ii++) { 378 for (jj=0; jj<bs; jj++) { 379 *bap++ = *value++; 380 } 381 } 382 } 383 } 384 goto noinsert2; 385 } 386 } 387 if (nonew == 1) goto noinsert2; 388 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 389 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 390 N = nrow++ - 1; high++; 391 /* shift up all the later entries in this row */ 392 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 393 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 394 rp[i] = col; 395 bap = ap + bs2*i; 396 if (roworiented) { 397 for (ii=0; ii<bs; ii++) { 398 for (jj=ii; jj<bs2; jj+=bs) { 399 bap[jj] = *value++; 400 } 401 } 402 } else { 403 for (ii=0; ii<bs; ii++) { 404 for (jj=0; jj<bs; jj++) { 405 *bap++ = *value++; 406 } 407 } 408 } 409 noinsert2:; 410 ailen[row] = nrow; 411 PetscFunctionReturn(0); 412 } 413 414 /* 415 This routine is exactly duplicated in mpibaij.c 416 */ 417 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 418 { 419 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 420 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 421 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 422 PetscErrorCode ierr; 423 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 424 PetscBool roworiented=a->roworiented; 425 const PetscScalar *value = v; 426 MatScalar *ap,*aa = a->a,*bap; 427 428 PetscFunctionBegin; 429 rp = aj + ai[row]; 430 ap = aa + bs2*ai[row]; 431 rmax = imax[row]; 432 nrow = ailen[row]; 433 low = 0; 434 high = nrow; 435 value = v; 436 while (high-low > 7) { 437 t = (low+high)/2; 438 if (rp[t] > col) high = t; 439 else low = t; 440 } 441 for (i=low; i<high; i++) { 442 if (rp[i] > col) break; 443 if (rp[i] == col) { 444 bap = ap + bs2*i; 445 if (roworiented) { 446 if (is == ADD_VALUES) { 447 for (ii=0; ii<bs; ii++) { 448 for (jj=ii; jj<bs2; jj+=bs) { 449 bap[jj] += *value++; 450 } 451 } 452 } else { 453 for (ii=0; ii<bs; ii++) { 454 for (jj=ii; jj<bs2; jj+=bs) { 455 bap[jj] = *value++; 456 } 457 } 458 } 459 } else { 460 if (is == ADD_VALUES) { 461 for (ii=0; ii<bs; ii++,value+=bs) { 462 for (jj=0; jj<bs; jj++) { 463 bap[jj] += value[jj]; 464 } 465 bap += bs; 466 } 467 } else { 468 for (ii=0; ii<bs; ii++,value+=bs) { 469 for (jj=0; jj<bs; jj++) { 470 bap[jj] = value[jj]; 471 } 472 bap += bs; 473 } 474 } 475 } 476 goto noinsert2; 477 } 478 } 479 if (nonew == 1) goto noinsert2; 480 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 481 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 482 N = nrow++ - 1; high++; 483 /* shift up all the later entries in this row */ 484 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 485 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 486 rp[i] = col; 487 bap = ap + bs2*i; 488 if (roworiented) { 489 for (ii=0; ii<bs; ii++) { 490 for (jj=ii; jj<bs2; jj+=bs) { 491 bap[jj] = *value++; 492 } 493 } 494 } else { 495 for (ii=0; ii<bs; ii++) { 496 for (jj=0; jj<bs; jj++) { 497 *bap++ = *value++; 498 } 499 } 500 } 501 noinsert2:; 502 ailen[row] = nrow; 503 PetscFunctionReturn(0); 504 } 505 506 /* 507 This routine could be optimized by removing the need for the block copy below and passing stride information 508 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 509 */ 510 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 511 { 512 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 513 const MatScalar *value; 514 MatScalar *barray =baij->barray; 515 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 516 PetscErrorCode ierr; 517 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 518 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 519 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 520 521 PetscFunctionBegin; 522 if (!barray) { 523 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 524 baij->barray = barray; 525 } 526 527 if (roworiented) { 528 stepval = (n-1)*bs; 529 } else { 530 stepval = (m-1)*bs; 531 } 532 for (i=0; i<m; i++) { 533 if (im[i] < 0) continue; 534 #if defined(PETSC_USE_DEBUG) 535 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 536 #endif 537 if (im[i] >= rstart && im[i] < rend) { 538 row = im[i] - rstart; 539 for (j=0; j<n; j++) { 540 if (im[i] > in[j]) { 541 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 542 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 543 } 544 /* If NumCol = 1 then a copy is not required */ 545 if ((roworiented) && (n == 1)) { 546 barray = (MatScalar*) v + i*bs2; 547 } else if ((!roworiented) && (m == 1)) { 548 barray = (MatScalar*) v + j*bs2; 549 } else { /* Here a copy is required */ 550 if (roworiented) { 551 value = v + i*(stepval+bs)*bs + j*bs; 552 } else { 553 value = v + j*(stepval+bs)*bs + i*bs; 554 } 555 for (ii=0; ii<bs; ii++,value+=stepval) { 556 for (jj=0; jj<bs; jj++) { 557 *barray++ = *value++; 558 } 559 } 560 barray -=bs2; 561 } 562 563 if (in[j] >= cstart && in[j] < cend) { 564 col = in[j] - cstart; 565 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 566 } else if (in[j] < 0) continue; 567 #if defined(PETSC_USE_DEBUG) 568 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 569 #endif 570 else { 571 if (mat->was_assembled) { 572 if (!baij->colmap) { 573 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 574 } 575 576 #if defined(PETSC_USE_DEBUG) 577 #if defined(PETSC_USE_CTABLE) 578 { PetscInt data; 579 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 580 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 581 } 582 #else 583 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 584 #endif 585 #endif 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col = (col - 1)/bs; 589 #else 590 col = (baij->colmap[in[j]] - 1)/bs; 591 #endif 592 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 593 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 } 596 } else col = in[j]; 597 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 598 } 599 } 600 } else { 601 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 602 if (!baij->donotstash) { 603 if (roworiented) { 604 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 605 } else { 606 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 607 } 608 } 609 } 610 } 611 PetscFunctionReturn(0); 612 } 613 614 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 615 { 616 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 617 PetscErrorCode ierr; 618 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 619 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 620 621 PetscFunctionBegin; 622 for (i=0; i<m; i++) { 623 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 624 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 625 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 626 row = idxm[i] - bsrstart; 627 for (j=0; j<n; j++) { 628 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 629 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 630 if (idxn[j] >= bscstart && idxn[j] < bscend) { 631 col = idxn[j] - bscstart; 632 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } else { 634 if (!baij->colmap) { 635 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 636 } 637 #if defined(PETSC_USE_CTABLE) 638 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 639 data--; 640 #else 641 data = baij->colmap[idxn[j]/bs]-1; 642 #endif 643 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 644 else { 645 col = data + idxn[j]%bs; 646 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } 648 } 649 } 650 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 651 } 652 PetscFunctionReturn(0); 653 } 654 655 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 656 { 657 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 658 PetscErrorCode ierr; 659 PetscReal sum[2],*lnorm2; 660 661 PetscFunctionBegin; 662 if (baij->size == 1) { 663 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 664 } else { 665 if (type == NORM_FROBENIUS) { 666 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 667 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 668 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 669 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 670 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 671 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 672 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 673 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 674 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 675 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 676 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 677 PetscReal *rsum,*rsum2,vabs; 678 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 679 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 680 MatScalar *v; 681 682 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 683 ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr); 684 /* Amat */ 685 v = amat->a; jj = amat->j; 686 for (brow=0; brow<mbs; brow++) { 687 grow = bs*(rstart + brow); 688 nz = amat->i[brow+1] - amat->i[brow]; 689 for (bcol=0; bcol<nz; bcol++) { 690 gcol = bs*(rstart + *jj); jj++; 691 for (col=0; col<bs; col++) { 692 for (row=0; row<bs; row++) { 693 vabs = PetscAbsScalar(*v); v++; 694 rsum[gcol+col] += vabs; 695 /* non-diagonal block */ 696 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 697 } 698 } 699 } 700 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 701 } 702 /* Bmat */ 703 v = bmat->a; jj = bmat->j; 704 for (brow=0; brow<mbs; brow++) { 705 grow = bs*(rstart + brow); 706 nz = bmat->i[brow+1] - bmat->i[brow]; 707 for (bcol=0; bcol<nz; bcol++) { 708 gcol = bs*garray[*jj]; jj++; 709 for (col=0; col<bs; col++) { 710 for (row=0; row<bs; row++) { 711 vabs = PetscAbsScalar(*v); v++; 712 rsum[gcol+col] += vabs; 713 rsum[grow+row] += vabs; 714 } 715 } 716 } 717 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 718 } 719 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 720 *norm = 0.0; 721 for (col=0; col<mat->cmap->N; col++) { 722 if (rsum2[col] > *norm) *norm = rsum2[col]; 723 } 724 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 725 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 726 } 727 PetscFunctionReturn(0); 728 } 729 730 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 731 { 732 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 733 PetscErrorCode ierr; 734 PetscInt nstash,reallocs; 735 736 PetscFunctionBegin; 737 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 738 739 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 740 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 741 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 742 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 743 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 744 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 749 { 750 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 751 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 752 PetscErrorCode ierr; 753 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 754 PetscInt *row,*col; 755 PetscBool other_disassembled; 756 PetscMPIInt n; 757 PetscBool r1,r2,r3; 758 MatScalar *val; 759 760 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 761 PetscFunctionBegin; 762 if (!baij->donotstash && !mat->nooffprocentries) { 763 while (1) { 764 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 765 if (!flg) break; 766 767 for (i=0; i<n;) { 768 /* Now identify the consecutive vals belonging to the same row */ 769 for (j=i,rstart=row[j]; j<n; j++) { 770 if (row[j] != rstart) break; 771 } 772 if (j < n) ncols = j-i; 773 else ncols = n-i; 774 /* Now assemble all these values with a single function call */ 775 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 776 i = j; 777 } 778 } 779 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 780 /* Now process the block-stash. Since the values are stashed column-oriented, 781 set the roworiented flag to column oriented, and after MatSetValues() 782 restore the original flags */ 783 r1 = baij->roworiented; 784 r2 = a->roworiented; 785 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 786 787 baij->roworiented = PETSC_FALSE; 788 a->roworiented = PETSC_FALSE; 789 790 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 791 while (1) { 792 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 793 if (!flg) break; 794 795 for (i=0; i<n;) { 796 /* Now identify the consecutive vals belonging to the same row */ 797 for (j=i,rstart=row[j]; j<n; j++) { 798 if (row[j] != rstart) break; 799 } 800 if (j < n) ncols = j-i; 801 else ncols = n-i; 802 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 803 i = j; 804 } 805 } 806 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 807 808 baij->roworiented = r1; 809 a->roworiented = r2; 810 811 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 812 } 813 814 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 816 817 /* determine if any processor has disassembled, if so we must 818 also disassemble ourselfs, in order that we may reassemble. */ 819 /* 820 if nonzero structure of submatrix B cannot change then we know that 821 no processor disassembled thus we can skip this stuff 822 */ 823 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 824 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 825 if (mat->was_assembled && !other_disassembled) { 826 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 827 } 828 } 829 830 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 831 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 832 } 833 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 835 836 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 837 838 baij->rowvalues = 0; 839 840 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 841 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 842 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 843 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 844 } 845 PetscFunctionReturn(0); 846 } 847 848 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 849 #include <petscdraw.h> 850 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 851 { 852 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 853 PetscErrorCode ierr; 854 PetscInt bs = mat->rmap->bs; 855 PetscMPIInt rank = baij->rank; 856 PetscBool iascii,isdraw; 857 PetscViewer sviewer; 858 PetscViewerFormat format; 859 860 PetscFunctionBegin; 861 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 862 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 863 if (iascii) { 864 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 865 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 866 MatInfo info; 867 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 868 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 869 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 870 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 871 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 872 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 873 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 874 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 875 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 876 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 877 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 878 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } else if (format == PETSC_VIEWER_ASCII_INFO) { 881 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 882 PetscFunctionReturn(0); 883 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 884 PetscFunctionReturn(0); 885 } 886 } 887 888 if (isdraw) { 889 PetscDraw draw; 890 PetscBool isnull; 891 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 892 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 893 if (isnull) PetscFunctionReturn(0); 894 } 895 896 { 897 /* assemble the entire matrix onto first processor. */ 898 Mat A; 899 Mat_SeqSBAIJ *Aloc; 900 Mat_SeqBAIJ *Bloc; 901 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 902 MatScalar *a; 903 const char *matname; 904 905 /* Should this be the same type as mat? */ 906 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 907 if (!rank) { 908 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 909 } else { 910 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 911 } 912 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 913 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 914 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 915 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 916 917 /* copy over the A part */ 918 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 919 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 920 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 921 922 for (i=0; i<mbs; i++) { 923 rvals[0] = bs*(baij->rstartbs + i); 924 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 925 for (j=ai[i]; j<ai[i+1]; j++) { 926 col = (baij->cstartbs+aj[j])*bs; 927 for (k=0; k<bs; k++) { 928 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 929 col++; 930 a += bs; 931 } 932 } 933 } 934 /* copy over the B part */ 935 Bloc = (Mat_SeqBAIJ*)baij->B->data; 936 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 937 for (i=0; i<mbs; i++) { 938 939 rvals[0] = bs*(baij->rstartbs + i); 940 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 941 for (j=ai[i]; j<ai[i+1]; j++) { 942 col = baij->garray[aj[j]]*bs; 943 for (k=0; k<bs; k++) { 944 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 945 col++; 946 a += bs; 947 } 948 } 949 } 950 ierr = PetscFree(rvals);CHKERRQ(ierr); 951 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 952 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 953 /* 954 Everyone has to call to draw the matrix since the graphics waits are 955 synchronized across all processors that share the PetscDraw object 956 */ 957 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 958 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 959 if (!rank) { 960 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 961 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 962 } 963 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 964 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 965 ierr = MatDestroy(&A);CHKERRQ(ierr); 966 } 967 PetscFunctionReturn(0); 968 } 969 970 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 971 { 972 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 973 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 974 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 975 PetscErrorCode ierr; 976 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 977 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 978 int fd; 979 PetscScalar *column_values; 980 FILE *file; 981 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 982 PetscInt message_count,flowcontrolcount; 983 984 PetscFunctionBegin; 985 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 986 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 987 nz = bs2*(A->nz + B->nz); 988 rlen = mat->rmap->n; 989 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 990 if (!rank) { 991 header[0] = MAT_FILE_CLASSID; 992 header[1] = mat->rmap->N; 993 header[2] = mat->cmap->N; 994 995 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 996 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 997 /* get largest number of rows any processor has */ 998 range = mat->rmap->range; 999 for (i=1; i<size; i++) { 1000 rlen = PetscMax(rlen,range[i+1] - range[i]); 1001 } 1002 } else { 1003 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1004 } 1005 1006 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1007 /* compute lengths of each row */ 1008 for (i=0; i<a->mbs; i++) { 1009 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1010 } 1011 /* store the row lengths to the file */ 1012 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1013 if (!rank) { 1014 MPI_Status status; 1015 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1016 rlen = (range[1] - range[0])/bs; 1017 for (i=0; i<rlen; i++) { 1018 for (j=0; j<bs; j++) { 1019 row_lens[i*bs+j] = bs*crow_lens[i]; 1020 } 1021 } 1022 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 rlen = (range[i+1] - range[i])/bs; 1025 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1026 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1027 for (k=0; k<rlen; k++) { 1028 for (j=0; j<bs; j++) { 1029 row_lens[k*bs+j] = bs*crow_lens[k]; 1030 } 1031 } 1032 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1033 } 1034 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1035 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1036 } else { 1037 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1038 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1039 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1040 } 1041 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1042 1043 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1044 information needed to make it for each row from a block row. This does require more communication but still not more than 1045 the communication needed for the nonzero values */ 1046 nzmax = nz; /* space a largest processor needs */ 1047 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1048 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1049 cnt = 0; 1050 for (i=0; i<a->mbs; i++) { 1051 pcnt = cnt; 1052 for (j=B->i[i]; j<B->i[i+1]; j++) { 1053 if ((col = garray[B->j[j]]) > cstart) break; 1054 for (l=0; l<bs; l++) { 1055 column_indices[cnt++] = bs*col+l; 1056 } 1057 } 1058 for (k=A->i[i]; k<A->i[i+1]; k++) { 1059 for (l=0; l<bs; l++) { 1060 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1061 } 1062 } 1063 for (; j<B->i[i+1]; j++) { 1064 for (l=0; l<bs; l++) { 1065 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1066 } 1067 } 1068 len = cnt - pcnt; 1069 for (k=1; k<bs; k++) { 1070 ierr = PetscArraycpy(&column_indices[cnt],&column_indices[pcnt],len);CHKERRQ(ierr); 1071 cnt += len; 1072 } 1073 } 1074 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1075 1076 /* store the columns to the file */ 1077 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1078 if (!rank) { 1079 MPI_Status status; 1080 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1081 for (i=1; i<size; i++) { 1082 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1083 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1084 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1085 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1086 } 1087 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1088 } else { 1089 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1090 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1091 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1092 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1095 1096 /* load up the numerical values */ 1097 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1098 cnt = 0; 1099 for (i=0; i<a->mbs; i++) { 1100 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1101 for (j=B->i[i]; j<B->i[i+1]; j++) { 1102 if (garray[B->j[j]] > cstart) break; 1103 for (l=0; l<bs; l++) { 1104 for (ll=0; ll<bs; ll++) { 1105 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1106 } 1107 } 1108 cnt += bs; 1109 } 1110 for (k=A->i[i]; k<A->i[i+1]; k++) { 1111 for (l=0; l<bs; l++) { 1112 for (ll=0; ll<bs; ll++) { 1113 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1114 } 1115 } 1116 cnt += bs; 1117 } 1118 for (; j<B->i[i+1]; j++) { 1119 for (l=0; l<bs; l++) { 1120 for (ll=0; ll<bs; ll++) { 1121 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1122 } 1123 } 1124 cnt += bs; 1125 } 1126 cnt += (bs-1)*rlen; 1127 } 1128 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1129 1130 /* store the column values to the file */ 1131 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1132 if (!rank) { 1133 MPI_Status status; 1134 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1135 for (i=1; i<size; i++) { 1136 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1137 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1138 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1139 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1140 } 1141 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1142 } else { 1143 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1144 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1145 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1146 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1147 } 1148 ierr = PetscFree(column_values);CHKERRQ(ierr); 1149 1150 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1151 if (file) { 1152 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1153 } 1154 PetscFunctionReturn(0); 1155 } 1156 1157 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1158 { 1159 PetscErrorCode ierr; 1160 PetscBool iascii,isdraw,issocket,isbinary; 1161 1162 PetscFunctionBegin; 1163 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1164 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1165 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1166 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1167 if (iascii || isdraw || issocket) { 1168 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1169 } else if (isbinary) { 1170 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 1175 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1176 { 1177 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1178 PetscErrorCode ierr; 1179 1180 PetscFunctionBegin; 1181 #if defined(PETSC_USE_LOG) 1182 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1183 #endif 1184 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1185 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1186 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1187 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1188 #if defined(PETSC_USE_CTABLE) 1189 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1190 #else 1191 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1192 #endif 1193 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1194 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1195 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1196 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1197 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1198 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1199 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1200 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1201 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1202 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1203 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1204 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1205 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1206 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1207 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1208 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1209 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1210 #endif 1211 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1212 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1213 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1214 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1215 1216 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1220 #if defined(PETSC_HAVE_ELEMENTAL) 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1222 #endif 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1229 { 1230 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1231 PetscErrorCode ierr; 1232 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1233 PetscScalar *from; 1234 const PetscScalar *x; 1235 1236 PetscFunctionBegin; 1237 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1238 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1239 1240 /* diagonal part */ 1241 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1242 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1243 1244 /* subdiagonal part */ 1245 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1246 1247 /* copy x into the vec slvec0 */ 1248 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1249 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1250 1251 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1252 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1253 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1254 1255 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1256 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1257 /* supperdiagonal part */ 1258 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1263 { 1264 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1265 PetscErrorCode ierr; 1266 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1267 PetscScalar *from; 1268 const PetscScalar *x; 1269 1270 PetscFunctionBegin; 1271 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1272 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1273 1274 /* diagonal part */ 1275 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1276 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1277 1278 /* subdiagonal part */ 1279 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1280 1281 /* copy x into the vec slvec0 */ 1282 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1283 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1284 1285 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1286 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1287 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1288 1289 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1290 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1291 /* supperdiagonal part */ 1292 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1297 { 1298 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1299 PetscErrorCode ierr; 1300 PetscInt nt; 1301 1302 PetscFunctionBegin; 1303 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1304 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1305 1306 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1307 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1308 1309 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1310 /* do diagonal part */ 1311 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1312 /* do supperdiagonal part */ 1313 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1314 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1315 /* do subdiagonal part */ 1316 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1317 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1318 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1323 { 1324 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1325 PetscErrorCode ierr; 1326 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1327 PetscScalar *from,zero=0.0; 1328 const PetscScalar *x; 1329 1330 PetscFunctionBegin; 1331 /* 1332 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1333 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1334 */ 1335 /* diagonal part */ 1336 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1337 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1338 1339 /* subdiagonal part */ 1340 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1341 1342 /* copy x into the vec slvec0 */ 1343 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1344 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1345 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1346 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1347 1348 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1349 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1350 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1351 1352 /* supperdiagonal part */ 1353 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } 1356 1357 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1358 { 1359 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1360 PetscErrorCode ierr; 1361 1362 PetscFunctionBegin; 1363 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1364 /* do diagonal part */ 1365 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1366 /* do supperdiagonal part */ 1367 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1368 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1369 1370 /* do subdiagonal part */ 1371 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1372 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1373 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 /* 1378 This only works correctly for square matrices where the subblock A->A is the 1379 diagonal block 1380 */ 1381 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1382 { 1383 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1384 PetscErrorCode ierr; 1385 1386 PetscFunctionBegin; 1387 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1388 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1393 { 1394 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1395 PetscErrorCode ierr; 1396 1397 PetscFunctionBegin; 1398 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1399 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 1403 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1404 { 1405 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1406 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1407 PetscErrorCode ierr; 1408 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1409 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1410 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1411 1412 PetscFunctionBegin; 1413 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1414 mat->getrowactive = PETSC_TRUE; 1415 1416 if (!mat->rowvalues && (idx || v)) { 1417 /* 1418 allocate enough space to hold information from the longest row. 1419 */ 1420 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1421 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1422 PetscInt max = 1,mbs = mat->mbs,tmp; 1423 for (i=0; i<mbs; i++) { 1424 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1425 if (max < tmp) max = tmp; 1426 } 1427 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1428 } 1429 1430 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1431 lrow = row - brstart; /* local row index */ 1432 1433 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1434 if (!v) {pvA = 0; pvB = 0;} 1435 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1436 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1437 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1438 nztot = nzA + nzB; 1439 1440 cmap = mat->garray; 1441 if (v || idx) { 1442 if (nztot) { 1443 /* Sort by increasing column numbers, assuming A and B already sorted */ 1444 PetscInt imark = -1; 1445 if (v) { 1446 *v = v_p = mat->rowvalues; 1447 for (i=0; i<nzB; i++) { 1448 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1449 else break; 1450 } 1451 imark = i; 1452 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1453 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1454 } 1455 if (idx) { 1456 *idx = idx_p = mat->rowindices; 1457 if (imark > -1) { 1458 for (i=0; i<imark; i++) { 1459 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1460 } 1461 } else { 1462 for (i=0; i<nzB; i++) { 1463 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1464 else break; 1465 } 1466 imark = i; 1467 } 1468 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1469 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1470 } 1471 } else { 1472 if (idx) *idx = 0; 1473 if (v) *v = 0; 1474 } 1475 } 1476 *nz = nztot; 1477 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1478 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1479 PetscFunctionReturn(0); 1480 } 1481 1482 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1483 { 1484 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1485 1486 PetscFunctionBegin; 1487 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1488 baij->getrowactive = PETSC_FALSE; 1489 PetscFunctionReturn(0); 1490 } 1491 1492 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1493 { 1494 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1495 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1496 1497 PetscFunctionBegin; 1498 aA->getrow_utriangular = PETSC_TRUE; 1499 PetscFunctionReturn(0); 1500 } 1501 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1502 { 1503 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1504 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1505 1506 PetscFunctionBegin; 1507 aA->getrow_utriangular = PETSC_FALSE; 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1512 { 1513 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1514 PetscErrorCode ierr; 1515 1516 PetscFunctionBegin; 1517 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1518 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 1522 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1523 { 1524 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1525 PetscErrorCode ierr; 1526 1527 PetscFunctionBegin; 1528 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1529 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1534 Input: isrow - distributed(parallel), 1535 iscol_local - locally owned (seq) 1536 */ 1537 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1538 { 1539 PetscErrorCode ierr; 1540 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1541 const PetscInt *ptr1,*ptr2; 1542 1543 PetscFunctionBegin; 1544 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1545 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1546 if (sz1 > sz2) { 1547 *flg = PETSC_FALSE; 1548 PetscFunctionReturn(0); 1549 } 1550 1551 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1552 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1553 1554 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1555 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1556 ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr); 1557 ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr); 1558 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1559 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1560 1561 nmatch=0; 1562 k = 0; 1563 for (i=0; i<sz1; i++){ 1564 for (j=k; j<sz2; j++){ 1565 if (a1[i] == a2[j]) { 1566 k = j; nmatch++; 1567 break; 1568 } 1569 } 1570 } 1571 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1572 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1573 ierr = PetscFree(a1);CHKERRQ(ierr); 1574 ierr = PetscFree(a2);CHKERRQ(ierr); 1575 if (nmatch < sz1) { 1576 *flg = PETSC_FALSE; 1577 } else { 1578 *flg = PETSC_TRUE; 1579 } 1580 PetscFunctionReturn(0); 1581 } 1582 1583 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1584 { 1585 PetscErrorCode ierr; 1586 IS iscol_local; 1587 PetscInt csize; 1588 PetscBool isequal; 1589 1590 PetscFunctionBegin; 1591 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1592 if (call == MAT_REUSE_MATRIX) { 1593 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1594 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1595 } else { 1596 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1597 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1598 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1599 } 1600 1601 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1602 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1603 if (call == MAT_INITIAL_MATRIX) { 1604 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1605 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1606 } 1607 PetscFunctionReturn(0); 1608 } 1609 1610 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1611 { 1612 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1613 PetscErrorCode ierr; 1614 1615 PetscFunctionBegin; 1616 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1617 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1618 PetscFunctionReturn(0); 1619 } 1620 1621 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1622 { 1623 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1624 Mat A = a->A,B = a->B; 1625 PetscErrorCode ierr; 1626 PetscReal isend[5],irecv[5]; 1627 1628 PetscFunctionBegin; 1629 info->block_size = (PetscReal)matin->rmap->bs; 1630 1631 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1632 1633 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1634 isend[3] = info->memory; isend[4] = info->mallocs; 1635 1636 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1637 1638 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1639 isend[3] += info->memory; isend[4] += info->mallocs; 1640 if (flag == MAT_LOCAL) { 1641 info->nz_used = isend[0]; 1642 info->nz_allocated = isend[1]; 1643 info->nz_unneeded = isend[2]; 1644 info->memory = isend[3]; 1645 info->mallocs = isend[4]; 1646 } else if (flag == MAT_GLOBAL_MAX) { 1647 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1648 1649 info->nz_used = irecv[0]; 1650 info->nz_allocated = irecv[1]; 1651 info->nz_unneeded = irecv[2]; 1652 info->memory = irecv[3]; 1653 info->mallocs = irecv[4]; 1654 } else if (flag == MAT_GLOBAL_SUM) { 1655 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1656 1657 info->nz_used = irecv[0]; 1658 info->nz_allocated = irecv[1]; 1659 info->nz_unneeded = irecv[2]; 1660 info->memory = irecv[3]; 1661 info->mallocs = irecv[4]; 1662 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1663 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1664 info->fill_ratio_needed = 0; 1665 info->factor_mallocs = 0; 1666 PetscFunctionReturn(0); 1667 } 1668 1669 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1670 { 1671 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1672 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1673 PetscErrorCode ierr; 1674 1675 PetscFunctionBegin; 1676 switch (op) { 1677 case MAT_NEW_NONZERO_LOCATIONS: 1678 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1679 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1680 case MAT_KEEP_NONZERO_PATTERN: 1681 case MAT_SUBMAT_SINGLEIS: 1682 case MAT_NEW_NONZERO_LOCATION_ERR: 1683 MatCheckPreallocated(A,1); 1684 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1685 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1686 break; 1687 case MAT_ROW_ORIENTED: 1688 MatCheckPreallocated(A,1); 1689 a->roworiented = flg; 1690 1691 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1692 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1693 break; 1694 case MAT_NEW_DIAGONALS: 1695 case MAT_SORTED_FULL: 1696 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1697 break; 1698 case MAT_IGNORE_OFF_PROC_ENTRIES: 1699 a->donotstash = flg; 1700 break; 1701 case MAT_USE_HASH_TABLE: 1702 a->ht_flag = flg; 1703 break; 1704 case MAT_HERMITIAN: 1705 MatCheckPreallocated(A,1); 1706 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1707 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1708 #if defined(PETSC_USE_COMPLEX) 1709 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1710 #endif 1711 break; 1712 case MAT_SPD: 1713 A->spd_set = PETSC_TRUE; 1714 A->spd = flg; 1715 if (flg) { 1716 A->symmetric = PETSC_TRUE; 1717 A->structurally_symmetric = PETSC_TRUE; 1718 A->symmetric_set = PETSC_TRUE; 1719 A->structurally_symmetric_set = PETSC_TRUE; 1720 } 1721 break; 1722 case MAT_SYMMETRIC: 1723 MatCheckPreallocated(A,1); 1724 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1725 break; 1726 case MAT_STRUCTURALLY_SYMMETRIC: 1727 MatCheckPreallocated(A,1); 1728 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1729 break; 1730 case MAT_SYMMETRY_ETERNAL: 1731 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1732 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1733 break; 1734 case MAT_IGNORE_LOWER_TRIANGULAR: 1735 aA->ignore_ltriangular = flg; 1736 break; 1737 case MAT_ERROR_LOWER_TRIANGULAR: 1738 aA->ignore_ltriangular = flg; 1739 break; 1740 case MAT_GETROW_UPPERTRIANGULAR: 1741 aA->getrow_utriangular = flg; 1742 break; 1743 default: 1744 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1745 } 1746 PetscFunctionReturn(0); 1747 } 1748 1749 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1750 { 1751 PetscErrorCode ierr; 1752 1753 PetscFunctionBegin; 1754 if (reuse == MAT_INITIAL_MATRIX) { 1755 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1756 } else if (reuse == MAT_REUSE_MATRIX) { 1757 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1758 } 1759 PetscFunctionReturn(0); 1760 } 1761 1762 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1763 { 1764 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1765 Mat a = baij->A, b=baij->B; 1766 PetscErrorCode ierr; 1767 PetscInt nv,m,n; 1768 PetscBool flg; 1769 1770 PetscFunctionBegin; 1771 if (ll != rr) { 1772 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1773 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1774 } 1775 if (!ll) PetscFunctionReturn(0); 1776 1777 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1778 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1779 1780 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1781 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1782 1783 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1784 1785 /* left diagonalscale the off-diagonal part */ 1786 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1787 1788 /* scale the diagonal part */ 1789 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1790 1791 /* right diagonalscale the off-diagonal part */ 1792 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1794 PetscFunctionReturn(0); 1795 } 1796 1797 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1798 { 1799 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1800 PetscErrorCode ierr; 1801 1802 PetscFunctionBegin; 1803 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1804 PetscFunctionReturn(0); 1805 } 1806 1807 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1808 1809 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1810 { 1811 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1812 Mat a,b,c,d; 1813 PetscBool flg; 1814 PetscErrorCode ierr; 1815 1816 PetscFunctionBegin; 1817 a = matA->A; b = matA->B; 1818 c = matB->A; d = matB->B; 1819 1820 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1821 if (flg) { 1822 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1823 } 1824 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1825 PetscFunctionReturn(0); 1826 } 1827 1828 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1829 { 1830 PetscErrorCode ierr; 1831 PetscBool isbaij; 1832 1833 PetscFunctionBegin; 1834 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 1835 if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1836 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1837 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1838 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1839 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1840 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1841 } else { 1842 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1843 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1844 1845 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1846 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1847 } 1848 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1849 PetscFunctionReturn(0); 1850 } 1851 1852 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1853 { 1854 PetscErrorCode ierr; 1855 1856 PetscFunctionBegin; 1857 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1862 { 1863 PetscErrorCode ierr; 1864 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1865 PetscBLASInt bnz,one=1; 1866 Mat_SeqSBAIJ *xa,*ya; 1867 Mat_SeqBAIJ *xb,*yb; 1868 1869 PetscFunctionBegin; 1870 if (str == SAME_NONZERO_PATTERN) { 1871 PetscScalar alpha = a; 1872 xa = (Mat_SeqSBAIJ*)xx->A->data; 1873 ya = (Mat_SeqSBAIJ*)yy->A->data; 1874 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1875 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1876 xb = (Mat_SeqBAIJ*)xx->B->data; 1877 yb = (Mat_SeqBAIJ*)yy->B->data; 1878 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1879 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1880 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1881 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1882 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1883 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1884 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1885 } else { 1886 Mat B; 1887 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1888 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1889 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1890 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1891 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1892 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1893 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1894 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1895 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1896 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1897 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1898 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1899 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1900 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1901 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1902 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1903 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1904 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1905 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1906 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1907 } 1908 PetscFunctionReturn(0); 1909 } 1910 1911 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1912 { 1913 PetscErrorCode ierr; 1914 PetscInt i; 1915 PetscBool flg; 1916 1917 PetscFunctionBegin; 1918 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1919 for (i=0; i<n; i++) { 1920 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1921 if (!flg) { 1922 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1923 } 1924 } 1925 PetscFunctionReturn(0); 1926 } 1927 1928 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1929 { 1930 PetscErrorCode ierr; 1931 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1932 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1933 1934 PetscFunctionBegin; 1935 if (!Y->preallocated) { 1936 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1937 } else if (!aij->nz) { 1938 PetscInt nonew = aij->nonew; 1939 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1940 aij->nonew = nonew; 1941 } 1942 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1947 { 1948 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1949 PetscErrorCode ierr; 1950 1951 PetscFunctionBegin; 1952 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1953 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1954 if (d) { 1955 PetscInt rstart; 1956 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1957 *d += rstart/A->rmap->bs; 1958 1959 } 1960 PetscFunctionReturn(0); 1961 } 1962 1963 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1964 { 1965 PetscFunctionBegin; 1966 *a = ((Mat_MPISBAIJ*)A->data)->A; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 /* -------------------------------------------------------------------*/ 1971 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1972 MatGetRow_MPISBAIJ, 1973 MatRestoreRow_MPISBAIJ, 1974 MatMult_MPISBAIJ, 1975 /* 4*/ MatMultAdd_MPISBAIJ, 1976 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1977 MatMultAdd_MPISBAIJ, 1978 0, 1979 0, 1980 0, 1981 /* 10*/ 0, 1982 0, 1983 0, 1984 MatSOR_MPISBAIJ, 1985 MatTranspose_MPISBAIJ, 1986 /* 15*/ MatGetInfo_MPISBAIJ, 1987 MatEqual_MPISBAIJ, 1988 MatGetDiagonal_MPISBAIJ, 1989 MatDiagonalScale_MPISBAIJ, 1990 MatNorm_MPISBAIJ, 1991 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1992 MatAssemblyEnd_MPISBAIJ, 1993 MatSetOption_MPISBAIJ, 1994 MatZeroEntries_MPISBAIJ, 1995 /* 24*/ 0, 1996 0, 1997 0, 1998 0, 1999 0, 2000 /* 29*/ MatSetUp_MPISBAIJ, 2001 0, 2002 0, 2003 MatGetDiagonalBlock_MPISBAIJ, 2004 0, 2005 /* 34*/ MatDuplicate_MPISBAIJ, 2006 0, 2007 0, 2008 0, 2009 0, 2010 /* 39*/ MatAXPY_MPISBAIJ, 2011 MatCreateSubMatrices_MPISBAIJ, 2012 MatIncreaseOverlap_MPISBAIJ, 2013 MatGetValues_MPISBAIJ, 2014 MatCopy_MPISBAIJ, 2015 /* 44*/ 0, 2016 MatScale_MPISBAIJ, 2017 MatShift_MPISBAIJ, 2018 0, 2019 0, 2020 /* 49*/ 0, 2021 0, 2022 0, 2023 0, 2024 0, 2025 /* 54*/ 0, 2026 0, 2027 MatSetUnfactored_MPISBAIJ, 2028 0, 2029 MatSetValuesBlocked_MPISBAIJ, 2030 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 2031 0, 2032 0, 2033 0, 2034 0, 2035 /* 64*/ 0, 2036 0, 2037 0, 2038 0, 2039 0, 2040 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 2041 0, 2042 0, 2043 0, 2044 0, 2045 /* 74*/ 0, 2046 0, 2047 0, 2048 0, 2049 0, 2050 /* 79*/ 0, 2051 0, 2052 0, 2053 0, 2054 MatLoad_MPISBAIJ, 2055 /* 84*/ 0, 2056 0, 2057 0, 2058 0, 2059 0, 2060 /* 89*/ 0, 2061 0, 2062 0, 2063 0, 2064 0, 2065 /* 94*/ 0, 2066 0, 2067 0, 2068 0, 2069 0, 2070 /* 99*/ 0, 2071 0, 2072 0, 2073 0, 2074 0, 2075 /*104*/ 0, 2076 MatRealPart_MPISBAIJ, 2077 MatImaginaryPart_MPISBAIJ, 2078 MatGetRowUpperTriangular_MPISBAIJ, 2079 MatRestoreRowUpperTriangular_MPISBAIJ, 2080 /*109*/ 0, 2081 0, 2082 0, 2083 0, 2084 MatMissingDiagonal_MPISBAIJ, 2085 /*114*/ 0, 2086 0, 2087 0, 2088 0, 2089 0, 2090 /*119*/ 0, 2091 0, 2092 0, 2093 0, 2094 0, 2095 /*124*/ 0, 2096 0, 2097 0, 2098 0, 2099 0, 2100 /*129*/ 0, 2101 0, 2102 0, 2103 0, 2104 0, 2105 /*134*/ 0, 2106 0, 2107 0, 2108 0, 2109 0, 2110 /*139*/ MatSetBlockSizes_Default, 2111 0, 2112 0, 2113 0, 2114 0, 2115 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2116 }; 2117 2118 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2119 { 2120 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 2121 PetscErrorCode ierr; 2122 PetscInt i,mbs,Mbs; 2123 PetscMPIInt size; 2124 2125 PetscFunctionBegin; 2126 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2127 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2128 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2129 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2130 if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N); 2131 if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n); 2132 2133 mbs = B->rmap->n/bs; 2134 Mbs = B->rmap->N/bs; 2135 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2136 2137 B->rmap->bs = bs; 2138 b->bs2 = bs*bs; 2139 b->mbs = mbs; 2140 b->Mbs = Mbs; 2141 b->nbs = B->cmap->n/bs; 2142 b->Nbs = B->cmap->N/bs; 2143 2144 for (i=0; i<=b->size; i++) { 2145 b->rangebs[i] = B->rmap->range[i]/bs; 2146 } 2147 b->rstartbs = B->rmap->rstart/bs; 2148 b->rendbs = B->rmap->rend/bs; 2149 2150 b->cstartbs = B->cmap->rstart/bs; 2151 b->cendbs = B->cmap->rend/bs; 2152 2153 #if defined(PETSC_USE_CTABLE) 2154 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2155 #else 2156 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2157 #endif 2158 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2159 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2160 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2161 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2162 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2163 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2164 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2165 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2166 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2167 2168 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2169 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2170 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2171 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2172 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2173 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2174 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2175 2176 if (!B->preallocated) { 2177 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2178 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2179 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2180 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2181 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2182 } 2183 2184 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2185 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2186 2187 B->preallocated = PETSC_TRUE; 2188 B->was_assembled = PETSC_FALSE; 2189 B->assembled = PETSC_FALSE; 2190 PetscFunctionReturn(0); 2191 } 2192 2193 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2194 { 2195 PetscInt m,rstart,cend; 2196 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0; 2197 const PetscInt *JJ =0; 2198 PetscScalar *values=0; 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2203 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2204 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2205 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2206 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2207 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2208 m = B->rmap->n/bs; 2209 rstart = B->rmap->rstart/bs; 2210 cend = B->cmap->rend/bs; 2211 2212 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2213 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2214 for (i=0; i<m; i++) { 2215 nz = ii[i+1] - ii[i]; 2216 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2217 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2218 JJ = jj + ii[i]; 2219 bd = 0; 2220 for (j=0; j<nz; j++) { 2221 if (*JJ >= i + rstart) break; 2222 JJ++; 2223 bd++; 2224 } 2225 d = 0; 2226 for (; j<nz; j++) { 2227 if (*JJ++ >= cend) break; 2228 d++; 2229 } 2230 d_nnz[i] = d; 2231 o_nnz[i] = nz - d - bd; 2232 nz = nz - bd; 2233 nz_max = PetscMax(nz_max,nz); 2234 } 2235 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2236 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2237 2238 values = (PetscScalar*)V; 2239 if (!values) { 2240 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2241 } 2242 for (i=0; i<m; i++) { 2243 PetscInt row = i + rstart; 2244 PetscInt ncols = ii[i+1] - ii[i]; 2245 const PetscInt *icols = jj + ii[i]; 2246 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2247 if (bs == 1) { 2248 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2249 } else { 2250 for (j=0; j<ncols; j++) { 2251 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2252 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2253 } 2254 } 2255 } 2256 2257 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2258 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2259 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2260 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2261 PetscFunctionReturn(0); 2262 } 2263 2264 /*MC 2265 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2266 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2267 the matrix is stored. 2268 2269 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2270 can call MatSetOption(Mat, MAT_HERMITIAN); 2271 2272 Options Database Keys: 2273 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2274 2275 Notes: 2276 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2277 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2278 2279 Level: beginner 2280 2281 .seealso: MatCreateMPISBAIJ(), MATSEQSBAIJ, MatType 2282 M*/ 2283 2284 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2285 { 2286 Mat_MPISBAIJ *b; 2287 PetscErrorCode ierr; 2288 PetscBool flg = PETSC_FALSE; 2289 2290 PetscFunctionBegin; 2291 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2292 B->data = (void*)b; 2293 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2294 2295 B->ops->destroy = MatDestroy_MPISBAIJ; 2296 B->ops->view = MatView_MPISBAIJ; 2297 B->assembled = PETSC_FALSE; 2298 B->insertmode = NOT_SET_VALUES; 2299 2300 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2301 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2302 2303 /* build local table of row and column ownerships */ 2304 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2305 2306 /* build cache for off array entries formed */ 2307 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2308 2309 b->donotstash = PETSC_FALSE; 2310 b->colmap = NULL; 2311 b->garray = NULL; 2312 b->roworiented = PETSC_TRUE; 2313 2314 /* stuff used in block assembly */ 2315 b->barray = 0; 2316 2317 /* stuff used for matrix vector multiply */ 2318 b->lvec = 0; 2319 b->Mvctx = 0; 2320 b->slvec0 = 0; 2321 b->slvec0b = 0; 2322 b->slvec1 = 0; 2323 b->slvec1a = 0; 2324 b->slvec1b = 0; 2325 b->sMvctx = 0; 2326 2327 /* stuff for MatGetRow() */ 2328 b->rowindices = 0; 2329 b->rowvalues = 0; 2330 b->getrowactive = PETSC_FALSE; 2331 2332 /* hash table stuff */ 2333 b->ht = 0; 2334 b->hd = 0; 2335 b->ht_size = 0; 2336 b->ht_flag = PETSC_FALSE; 2337 b->ht_fact = 0; 2338 b->ht_total_ct = 0; 2339 b->ht_insert_ct = 0; 2340 2341 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2342 b->ijonly = PETSC_FALSE; 2343 2344 b->in_loc = 0; 2345 b->v_loc = 0; 2346 b->n_loc = 0; 2347 2348 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2349 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2350 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2351 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2352 #if defined(PETSC_HAVE_ELEMENTAL) 2353 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2354 #endif 2355 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr); 2356 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr); 2357 2358 B->symmetric = PETSC_TRUE; 2359 B->structurally_symmetric = PETSC_TRUE; 2360 B->symmetric_set = PETSC_TRUE; 2361 B->structurally_symmetric_set = PETSC_TRUE; 2362 B->symmetric_eternal = PETSC_TRUE; 2363 2364 B->hermitian = PETSC_FALSE; 2365 B->hermitian_set = PETSC_FALSE; 2366 2367 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2368 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2369 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2370 if (flg) { 2371 PetscReal fact = 1.39; 2372 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2373 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2374 if (fact <= 1.0) fact = 1.39; 2375 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2376 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2377 } 2378 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 /*MC 2383 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2384 2385 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2386 and MATMPISBAIJ otherwise. 2387 2388 Options Database Keys: 2389 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2390 2391 Level: beginner 2392 2393 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2394 M*/ 2395 2396 /*@C 2397 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2398 the user should preallocate the matrix storage by setting the parameters 2399 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2400 performance can be increased by more than a factor of 50. 2401 2402 Collective on Mat 2403 2404 Input Parameters: 2405 + B - the matrix 2406 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2407 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2408 . d_nz - number of block nonzeros per block row in diagonal portion of local 2409 submatrix (same for all local rows) 2410 . d_nnz - array containing the number of block nonzeros in the various block rows 2411 in the upper triangular and diagonal part of the in diagonal portion of the local 2412 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2413 for the diagonal entry and set a value even if it is zero. 2414 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2415 submatrix (same for all local rows). 2416 - o_nnz - array containing the number of nonzeros in the various block rows of the 2417 off-diagonal portion of the local submatrix that is right of the diagonal 2418 (possibly different for each block row) or NULL. 2419 2420 2421 Options Database Keys: 2422 + -mat_no_unroll - uses code that does not unroll the loops in the 2423 block calculations (much slower) 2424 - -mat_block_size - size of the blocks to use 2425 2426 Notes: 2427 2428 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2429 than it must be used on all processors that share the object for that argument. 2430 2431 If the *_nnz parameter is given then the *_nz parameter is ignored 2432 2433 Storage Information: 2434 For a square global matrix we define each processor's diagonal portion 2435 to be its local rows and the corresponding columns (a square submatrix); 2436 each processor's off-diagonal portion encompasses the remainder of the 2437 local matrix (a rectangular submatrix). 2438 2439 The user can specify preallocated storage for the diagonal part of 2440 the local submatrix with either d_nz or d_nnz (not both). Set 2441 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2442 memory allocation. Likewise, specify preallocated storage for the 2443 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2444 2445 You can call MatGetInfo() to get information on how effective the preallocation was; 2446 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2447 You can also run with the option -info and look for messages with the string 2448 malloc in them to see if additional memory allocation was needed. 2449 2450 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2451 the figure below we depict these three local rows and all columns (0-11). 2452 2453 .vb 2454 0 1 2 3 4 5 6 7 8 9 10 11 2455 -------------------------- 2456 row 3 |. . . d d d o o o o o o 2457 row 4 |. . . d d d o o o o o o 2458 row 5 |. . . d d d o o o o o o 2459 -------------------------- 2460 .ve 2461 2462 Thus, any entries in the d locations are stored in the d (diagonal) 2463 submatrix, and any entries in the o locations are stored in the 2464 o (off-diagonal) submatrix. Note that the d matrix is stored in 2465 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2466 2467 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2468 plus the diagonal part of the d matrix, 2469 and o_nz should indicate the number of block nonzeros per row in the o matrix 2470 2471 In general, for PDE problems in which most nonzeros are near the diagonal, 2472 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2473 or you will get TERRIBLE performance; see the users' manual chapter on 2474 matrices. 2475 2476 Level: intermediate 2477 2478 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2479 @*/ 2480 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2481 { 2482 PetscErrorCode ierr; 2483 2484 PetscFunctionBegin; 2485 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2486 PetscValidType(B,1); 2487 PetscValidLogicalCollectiveInt(B,bs,2); 2488 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 /*@C 2493 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2494 (block compressed row). For good matrix assembly performance 2495 the user should preallocate the matrix storage by setting the parameters 2496 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2497 performance can be increased by more than a factor of 50. 2498 2499 Collective 2500 2501 Input Parameters: 2502 + comm - MPI communicator 2503 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2504 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2505 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2506 This value should be the same as the local size used in creating the 2507 y vector for the matrix-vector product y = Ax. 2508 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2509 This value should be the same as the local size used in creating the 2510 x vector for the matrix-vector product y = Ax. 2511 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2512 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2513 . d_nz - number of block nonzeros per block row in diagonal portion of local 2514 submatrix (same for all local rows) 2515 . d_nnz - array containing the number of block nonzeros in the various block rows 2516 in the upper triangular portion of the in diagonal portion of the local 2517 (possibly different for each block block row) or NULL. 2518 If you plan to factor the matrix you must leave room for the diagonal entry and 2519 set its value even if it is zero. 2520 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2521 submatrix (same for all local rows). 2522 - o_nnz - array containing the number of nonzeros in the various block rows of the 2523 off-diagonal portion of the local submatrix (possibly different for 2524 each block row) or NULL. 2525 2526 Output Parameter: 2527 . A - the matrix 2528 2529 Options Database Keys: 2530 + -mat_no_unroll - uses code that does not unroll the loops in the 2531 block calculations (much slower) 2532 . -mat_block_size - size of the blocks to use 2533 - -mat_mpi - use the parallel matrix data structures even on one processor 2534 (defaults to using SeqBAIJ format on one processor) 2535 2536 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2537 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2538 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2539 2540 Notes: 2541 The number of rows and columns must be divisible by blocksize. 2542 This matrix type does not support complex Hermitian operation. 2543 2544 The user MUST specify either the local or global matrix dimensions 2545 (possibly both). 2546 2547 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2548 than it must be used on all processors that share the object for that argument. 2549 2550 If the *_nnz parameter is given then the *_nz parameter is ignored 2551 2552 Storage Information: 2553 For a square global matrix we define each processor's diagonal portion 2554 to be its local rows and the corresponding columns (a square submatrix); 2555 each processor's off-diagonal portion encompasses the remainder of the 2556 local matrix (a rectangular submatrix). 2557 2558 The user can specify preallocated storage for the diagonal part of 2559 the local submatrix with either d_nz or d_nnz (not both). Set 2560 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2561 memory allocation. Likewise, specify preallocated storage for the 2562 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2563 2564 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2565 the figure below we depict these three local rows and all columns (0-11). 2566 2567 .vb 2568 0 1 2 3 4 5 6 7 8 9 10 11 2569 -------------------------- 2570 row 3 |. . . d d d o o o o o o 2571 row 4 |. . . d d d o o o o o o 2572 row 5 |. . . d d d o o o o o o 2573 -------------------------- 2574 .ve 2575 2576 Thus, any entries in the d locations are stored in the d (diagonal) 2577 submatrix, and any entries in the o locations are stored in the 2578 o (off-diagonal) submatrix. Note that the d matrix is stored in 2579 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2580 2581 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2582 plus the diagonal part of the d matrix, 2583 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2584 In general, for PDE problems in which most nonzeros are near the diagonal, 2585 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2586 or you will get TERRIBLE performance; see the users' manual chapter on 2587 matrices. 2588 2589 Level: intermediate 2590 2591 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2592 @*/ 2593 2594 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2595 { 2596 PetscErrorCode ierr; 2597 PetscMPIInt size; 2598 2599 PetscFunctionBegin; 2600 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2601 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2602 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2603 if (size > 1) { 2604 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2605 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2606 } else { 2607 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2608 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2609 } 2610 PetscFunctionReturn(0); 2611 } 2612 2613 2614 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2615 { 2616 Mat mat; 2617 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2618 PetscErrorCode ierr; 2619 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2620 PetscScalar *array; 2621 2622 PetscFunctionBegin; 2623 *newmat = 0; 2624 2625 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2626 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2627 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2628 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2629 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2630 2631 mat->factortype = matin->factortype; 2632 mat->preallocated = PETSC_TRUE; 2633 mat->assembled = PETSC_TRUE; 2634 mat->insertmode = NOT_SET_VALUES; 2635 2636 a = (Mat_MPISBAIJ*)mat->data; 2637 a->bs2 = oldmat->bs2; 2638 a->mbs = oldmat->mbs; 2639 a->nbs = oldmat->nbs; 2640 a->Mbs = oldmat->Mbs; 2641 a->Nbs = oldmat->Nbs; 2642 2643 a->size = oldmat->size; 2644 a->rank = oldmat->rank; 2645 a->donotstash = oldmat->donotstash; 2646 a->roworiented = oldmat->roworiented; 2647 a->rowindices = 0; 2648 a->rowvalues = 0; 2649 a->getrowactive = PETSC_FALSE; 2650 a->barray = 0; 2651 a->rstartbs = oldmat->rstartbs; 2652 a->rendbs = oldmat->rendbs; 2653 a->cstartbs = oldmat->cstartbs; 2654 a->cendbs = oldmat->cendbs; 2655 2656 /* hash table stuff */ 2657 a->ht = 0; 2658 a->hd = 0; 2659 a->ht_size = 0; 2660 a->ht_flag = oldmat->ht_flag; 2661 a->ht_fact = oldmat->ht_fact; 2662 a->ht_total_ct = 0; 2663 a->ht_insert_ct = 0; 2664 2665 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr); 2666 if (oldmat->colmap) { 2667 #if defined(PETSC_USE_CTABLE) 2668 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2669 #else 2670 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2671 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2672 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 2673 #endif 2674 } else a->colmap = 0; 2675 2676 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2677 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2678 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2679 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 2680 } else a->garray = 0; 2681 2682 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2683 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2684 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2685 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2686 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2687 2688 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2689 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2690 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2691 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2692 2693 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2694 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2695 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2696 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2697 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2698 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2699 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2700 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2701 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2702 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2703 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2704 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2705 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2706 2707 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2708 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2709 a->sMvctx = oldmat->sMvctx; 2710 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2711 2712 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2713 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2714 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2715 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2716 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2717 *newmat = mat; 2718 PetscFunctionReturn(0); 2719 } 2720 2721 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2722 { 2723 PetscErrorCode ierr; 2724 PetscInt i,nz,j,rstart,rend; 2725 PetscScalar *vals,*buf; 2726 MPI_Comm comm; 2727 MPI_Status status; 2728 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2729 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2730 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2731 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2732 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2733 PetscInt dcount,kmax,k,nzcount,tmp; 2734 int fd; 2735 PetscBool isbinary; 2736 2737 PetscFunctionBegin; 2738 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2739 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 2740 2741 /* force binary viewer to load .info file if it has not yet done so */ 2742 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2743 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2744 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2745 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2746 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2747 if (bs < 0) bs = 1; 2748 2749 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2750 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2751 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2752 if (!rank) { 2753 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2754 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2755 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2756 } 2757 2758 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2759 M = header[1]; 2760 N = header[2]; 2761 2762 /* If global sizes are set, check if they are consistent with that given in the file */ 2763 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2764 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2765 2766 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2767 2768 /* 2769 This code adds extra rows to make sure the number of rows is 2770 divisible by the blocksize 2771 */ 2772 Mbs = M/bs; 2773 extra_rows = bs - M + bs*(Mbs); 2774 if (extra_rows == bs) extra_rows = 0; 2775 else Mbs++; 2776 if (extra_rows &&!rank) { 2777 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2778 } 2779 2780 /* determine ownership of all rows */ 2781 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2782 mbs = Mbs/size + ((Mbs % size) > rank); 2783 m = mbs*bs; 2784 } else { /* User Set */ 2785 m = newmat->rmap->n; 2786 mbs = m/bs; 2787 } 2788 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2789 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2790 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2791 rowners[0] = 0; 2792 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2793 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2794 rstart = rowners[rank]; 2795 rend = rowners[rank+1]; 2796 2797 /* distribute row lengths to all processors */ 2798 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2799 if (!rank) { 2800 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2801 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 2802 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2803 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2804 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2805 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2806 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2807 } else { 2808 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2809 } 2810 2811 if (!rank) { /* procs[0] */ 2812 /* calculate the number of nonzeros on each processor */ 2813 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2814 for (i=0; i<size; i++) { 2815 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2816 procsnz[i] += rowlengths[j]; 2817 } 2818 } 2819 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2820 2821 /* determine max buffer needed and allocate it */ 2822 maxnz = 0; 2823 for (i=0; i<size; i++) { 2824 maxnz = PetscMax(maxnz,procsnz[i]); 2825 } 2826 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2827 2828 /* read in my part of the matrix column indices */ 2829 nz = procsnz[0]; 2830 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2831 mycols = ibuf; 2832 if (size == 1) nz -= extra_rows; 2833 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2834 if (size == 1) { 2835 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2836 } 2837 2838 /* read in every ones (except the last) and ship off */ 2839 for (i=1; i<size-1; i++) { 2840 nz = procsnz[i]; 2841 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2842 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2843 } 2844 /* read in the stuff for the last proc */ 2845 if (size != 1) { 2846 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2847 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2848 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2849 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2850 } 2851 ierr = PetscFree(cols);CHKERRQ(ierr); 2852 } else { /* procs[i], i>0 */ 2853 /* determine buffer space needed for message */ 2854 nz = 0; 2855 for (i=0; i<m; i++) nz += locrowlens[i]; 2856 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2857 mycols = ibuf; 2858 /* receive message of column indices*/ 2859 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2860 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2861 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2862 } 2863 2864 /* loop over local rows, determining number of off diagonal entries */ 2865 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2866 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2867 rowcount = 0; 2868 nzcount = 0; 2869 for (i=0; i<mbs; i++) { 2870 dcount = 0; 2871 odcount = 0; 2872 for (j=0; j<bs; j++) { 2873 kmax = locrowlens[rowcount]; 2874 for (k=0; k<kmax; k++) { 2875 tmp = mycols[nzcount++]/bs; /* block col. index */ 2876 if (!mask[tmp]) { 2877 mask[tmp] = 1; 2878 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2879 else masked1[dcount++] = tmp; /* entry in diag portion */ 2880 } 2881 } 2882 rowcount++; 2883 } 2884 2885 dlens[i] = dcount; /* d_nzz[i] */ 2886 odlens[i] = odcount; /* o_nzz[i] */ 2887 2888 /* zero out the mask elements we set */ 2889 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2890 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2891 } 2892 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2893 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2894 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2895 2896 if (!rank) { 2897 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2898 /* read in my part of the matrix numerical values */ 2899 nz = procsnz[0]; 2900 vals = buf; 2901 mycols = ibuf; 2902 if (size == 1) nz -= extra_rows; 2903 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2904 if (size == 1) { 2905 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2906 } 2907 2908 /* insert into matrix */ 2909 jj = rstart*bs; 2910 for (i=0; i<m; i++) { 2911 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2912 mycols += locrowlens[i]; 2913 vals += locrowlens[i]; 2914 jj++; 2915 } 2916 2917 /* read in other processors (except the last one) and ship out */ 2918 for (i=1; i<size-1; i++) { 2919 nz = procsnz[i]; 2920 vals = buf; 2921 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2922 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2923 } 2924 /* the last proc */ 2925 if (size != 1) { 2926 nz = procsnz[i] - extra_rows; 2927 vals = buf; 2928 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2929 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2930 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2931 } 2932 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2933 2934 } else { 2935 /* receive numeric values */ 2936 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2937 2938 /* receive message of values*/ 2939 vals = buf; 2940 mycols = ibuf; 2941 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2942 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2943 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2944 2945 /* insert into matrix */ 2946 jj = rstart*bs; 2947 for (i=0; i<m; i++) { 2948 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2949 mycols += locrowlens[i]; 2950 vals += locrowlens[i]; 2951 jj++; 2952 } 2953 } 2954 2955 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2956 ierr = PetscFree(buf);CHKERRQ(ierr); 2957 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2958 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2959 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2960 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2961 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2962 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2963 PetscFunctionReturn(0); 2964 } 2965 2966 /*XXXXX@ 2967 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2968 2969 Input Parameters: 2970 . mat - the matrix 2971 . fact - factor 2972 2973 Not Collective on Mat, each process can have a different hash factor 2974 2975 Level: advanced 2976 2977 Notes: 2978 This can also be set by the command line option: -mat_use_hash_table fact 2979 2980 .seealso: MatSetOption() 2981 @XXXXX*/ 2982 2983 2984 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2985 { 2986 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2987 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2988 PetscReal atmp; 2989 PetscReal *work,*svalues,*rvalues; 2990 PetscErrorCode ierr; 2991 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2992 PetscMPIInt rank,size; 2993 PetscInt *rowners_bs,dest,count,source; 2994 PetscScalar *va; 2995 MatScalar *ba; 2996 MPI_Status stat; 2997 2998 PetscFunctionBegin; 2999 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 3000 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 3001 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3002 3003 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3004 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3005 3006 bs = A->rmap->bs; 3007 mbs = a->mbs; 3008 Mbs = a->Mbs; 3009 ba = b->a; 3010 bi = b->i; 3011 bj = b->j; 3012 3013 /* find ownerships */ 3014 rowners_bs = A->rmap->range; 3015 3016 /* each proc creates an array to be distributed */ 3017 ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr); 3018 3019 /* row_max for B */ 3020 if (rank != size-1) { 3021 for (i=0; i<mbs; i++) { 3022 ncols = bi[1] - bi[0]; bi++; 3023 brow = bs*i; 3024 for (j=0; j<ncols; j++) { 3025 bcol = bs*(*bj); 3026 for (kcol=0; kcol<bs; kcol++) { 3027 col = bcol + kcol; /* local col index */ 3028 col += rowners_bs[rank+1]; /* global col index */ 3029 for (krow=0; krow<bs; krow++) { 3030 atmp = PetscAbsScalar(*ba); ba++; 3031 row = brow + krow; /* local row index */ 3032 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 3033 if (work[col] < atmp) work[col] = atmp; 3034 } 3035 } 3036 bj++; 3037 } 3038 } 3039 3040 /* send values to its owners */ 3041 for (dest=rank+1; dest<size; dest++) { 3042 svalues = work + rowners_bs[dest]; 3043 count = rowners_bs[dest+1]-rowners_bs[dest]; 3044 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 3045 } 3046 } 3047 3048 /* receive values */ 3049 if (rank) { 3050 rvalues = work; 3051 count = rowners_bs[rank+1]-rowners_bs[rank]; 3052 for (source=0; source<rank; source++) { 3053 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 3054 /* process values */ 3055 for (i=0; i<count; i++) { 3056 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 3057 } 3058 } 3059 } 3060 3061 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3062 ierr = PetscFree(work);CHKERRQ(ierr); 3063 PetscFunctionReturn(0); 3064 } 3065 3066 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3067 { 3068 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3069 PetscErrorCode ierr; 3070 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 3071 PetscScalar *x,*ptr,*from; 3072 Vec bb1; 3073 const PetscScalar *b; 3074 3075 PetscFunctionBegin; 3076 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3077 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3078 3079 if (flag == SOR_APPLY_UPPER) { 3080 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3081 PetscFunctionReturn(0); 3082 } 3083 3084 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3085 if (flag & SOR_ZERO_INITIAL_GUESS) { 3086 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3087 its--; 3088 } 3089 3090 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3091 while (its--) { 3092 3093 /* lower triangular part: slvec0b = - B^T*xx */ 3094 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3095 3096 /* copy xx into slvec0a */ 3097 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3098 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3099 ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr); 3100 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3101 3102 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3103 3104 /* copy bb into slvec1a */ 3105 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3106 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3107 ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr); 3108 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3109 3110 /* set slvec1b = 0 */ 3111 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3112 3113 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3114 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3115 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3116 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3117 3118 /* upper triangular part: bb1 = bb1 - B*x */ 3119 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3120 3121 /* local diagonal sweep */ 3122 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3123 } 3124 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3125 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3126 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3127 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3128 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3129 } else if (flag & SOR_EISENSTAT) { 3130 Vec xx1; 3131 PetscBool hasop; 3132 const PetscScalar *diag; 3133 PetscScalar *sl,scale = (omega - 2.0)/omega; 3134 PetscInt i,n; 3135 3136 if (!mat->xx1) { 3137 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3138 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3139 } 3140 xx1 = mat->xx1; 3141 bb1 = mat->bb1; 3142 3143 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3144 3145 if (!mat->diag) { 3146 /* this is wrong for same matrix with new nonzero values */ 3147 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3148 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3149 } 3150 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3151 3152 if (hasop) { 3153 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3154 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3155 } else { 3156 /* 3157 These two lines are replaced by code that may be a bit faster for a good compiler 3158 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3159 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3160 */ 3161 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3162 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3163 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3164 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3165 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3166 if (omega == 1.0) { 3167 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3168 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3169 } else { 3170 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3171 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3172 } 3173 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3174 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3175 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3176 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3177 } 3178 3179 /* multiply off-diagonal portion of matrix */ 3180 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3181 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3182 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3183 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3184 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 3185 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3186 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3187 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3188 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3189 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3190 3191 /* local sweep */ 3192 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3193 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3194 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3195 PetscFunctionReturn(0); 3196 } 3197 3198 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3199 { 3200 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3201 PetscErrorCode ierr; 3202 Vec lvec1,bb1; 3203 3204 PetscFunctionBegin; 3205 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3206 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3207 3208 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3209 if (flag & SOR_ZERO_INITIAL_GUESS) { 3210 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3211 its--; 3212 } 3213 3214 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3215 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3216 while (its--) { 3217 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3218 3219 /* lower diagonal part: bb1 = bb - B^T*xx */ 3220 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3221 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3222 3223 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3224 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3225 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3226 3227 /* upper diagonal part: bb1 = bb1 - B*x */ 3228 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3229 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3230 3231 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3232 3233 /* diagonal sweep */ 3234 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3235 } 3236 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3237 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3238 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3239 PetscFunctionReturn(0); 3240 } 3241 3242 /*@ 3243 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3244 CSR format the local rows. 3245 3246 Collective 3247 3248 Input Parameters: 3249 + comm - MPI communicator 3250 . bs - the block size, only a block size of 1 is supported 3251 . m - number of local rows (Cannot be PETSC_DECIDE) 3252 . n - This value should be the same as the local size used in creating the 3253 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3254 calculated if N is given) For square matrices n is almost always m. 3255 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3256 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3257 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 3258 . j - column indices 3259 - a - matrix values 3260 3261 Output Parameter: 3262 . mat - the matrix 3263 3264 Level: intermediate 3265 3266 Notes: 3267 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3268 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3269 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3270 3271 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3272 3273 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3274 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3275 @*/ 3276 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3277 { 3278 PetscErrorCode ierr; 3279 3280 3281 PetscFunctionBegin; 3282 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3283 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3284 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3285 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3286 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3287 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3288 PetscFunctionReturn(0); 3289 } 3290 3291 3292 /*@C 3293 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 3294 3295 Collective 3296 3297 Input Parameters: 3298 + B - the matrix 3299 . bs - the block size 3300 . i - the indices into j for the start of each local row (starts with zero) 3301 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3302 - v - optional values in the matrix 3303 3304 Level: advanced 3305 3306 Notes: 3307 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 3308 and usually the numerical values as well 3309 3310 Any entries below the diagonal are ignored 3311 3312 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3313 @*/ 3314 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3315 { 3316 PetscErrorCode ierr; 3317 3318 PetscFunctionBegin; 3319 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3320 PetscFunctionReturn(0); 3321 } 3322 3323 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3324 { 3325 PetscErrorCode ierr; 3326 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3327 PetscInt *indx; 3328 PetscScalar *values; 3329 3330 PetscFunctionBegin; 3331 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3332 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3333 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3334 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3335 PetscInt *bindx,rmax=a->rmax,j; 3336 PetscMPIInt rank,size; 3337 3338 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3339 mbs = m/bs; Nbs = N/cbs; 3340 if (n == PETSC_DECIDE) { 3341 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 3342 } 3343 nbs = n/cbs; 3344 3345 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3346 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3347 3348 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3349 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 3350 if (rank == size-1) { 3351 /* Check sum(nbs) = Nbs */ 3352 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3353 } 3354 3355 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3356 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3357 for (i=0; i<mbs; i++) { 3358 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3359 nnz = nnz/bs; 3360 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3361 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3362 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3363 } 3364 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3365 ierr = PetscFree(bindx);CHKERRQ(ierr); 3366 3367 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3368 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3369 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3370 ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr); 3371 ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3372 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3373 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3374 } 3375 3376 /* numeric phase */ 3377 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3378 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3379 3380 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3381 for (i=0; i<m; i++) { 3382 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3383 Ii = i + rstart; 3384 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3385 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3386 } 3387 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3388 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3389 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3390 PetscFunctionReturn(0); 3391 } 3392