1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 /* This could be moved to matimpl.h */ 12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 13 { 14 Mat preallocator; 15 PetscInt r,rstart,rend; 16 PetscInt bs,i,m,n,M,N; 17 PetscBool cong = PETSC_TRUE; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 22 PetscValidLogicalCollectiveInt(B,nm,2); 23 for (i = 0; i < nm; i++) { 24 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 25 ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 26 if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 27 } 28 PetscValidLogicalCollectiveBool(B,fill,5); 29 ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 30 ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 31 ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 32 ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 33 ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 34 ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 35 ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 36 ierr = MatSetUp(preallocator);CHKERRQ(ierr); 37 ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 38 for (r = rstart; r < rend; ++r) { 39 PetscInt ncols; 40 const PetscInt *row; 41 const PetscScalar *vals; 42 43 for (i = 0; i < nm; i++) { 44 ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 45 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 46 if (symm && symm[i]) { 47 ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 48 } 49 ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 50 } 51 } 52 ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 53 ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 54 ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 55 ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 static PetscErrorCode MatConvert_MPISBAIJ_XAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 60 { 61 Mat B; 62 PetscErrorCode ierr; 63 PetscInt r; 64 65 PetscFunctionBegin; 66 if (reuse != MAT_REUSE_MATRIX) { 67 PetscBool symm = PETSC_TRUE; 68 PetscInt bs; 69 70 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 71 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 72 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 73 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 74 ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 75 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 76 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 77 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 78 ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 79 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 80 } else B = *newmat; 81 82 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 83 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 84 PetscInt ncols; 85 const PetscInt *row; 86 const PetscScalar *vals; 87 88 ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 89 ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 90 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 91 ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 92 } 93 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 94 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96 97 if (reuse == MAT_INPLACE_MATRIX) { 98 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 99 } else { 100 *newmat = B; 101 } 102 PetscFunctionReturn(0); 103 } 104 105 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 106 { 107 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 108 PetscErrorCode ierr; 109 110 PetscFunctionBegin; 111 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 112 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 117 { 118 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 119 PetscErrorCode ierr; 120 121 PetscFunctionBegin; 122 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 123 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 124 PetscFunctionReturn(0); 125 } 126 127 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 128 { \ 129 brow = row/bs; \ 130 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 131 rmax = aimax[brow]; nrow = ailen[brow]; \ 132 bcol = col/bs; \ 133 ridx = row % bs; cidx = col % bs; \ 134 low = 0; high = nrow; \ 135 while (high-low > 3) { \ 136 t = (low+high)/2; \ 137 if (rp[t] > bcol) high = t; \ 138 else low = t; \ 139 } \ 140 for (_i=low; _i<high; _i++) { \ 141 if (rp[_i] > bcol) break; \ 142 if (rp[_i] == bcol) { \ 143 bap = ap + bs2*_i + bs*cidx + ridx; \ 144 if (addv == ADD_VALUES) *bap += value; \ 145 else *bap = value; \ 146 goto a_noinsert; \ 147 } \ 148 } \ 149 if (a->nonew == 1) goto a_noinsert; \ 150 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 151 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 152 N = nrow++ - 1; \ 153 /* shift up all the later entries in this row */ \ 154 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 155 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 156 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 157 rp[_i] = bcol; \ 158 ap[bs2*_i + bs*cidx + ridx] = value; \ 159 A->nonzerostate++;\ 160 a_noinsert:; \ 161 ailen[brow] = nrow; \ 162 } 163 164 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 165 { \ 166 brow = row/bs; \ 167 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 168 rmax = bimax[brow]; nrow = bilen[brow]; \ 169 bcol = col/bs; \ 170 ridx = row % bs; cidx = col % bs; \ 171 low = 0; high = nrow; \ 172 while (high-low > 3) { \ 173 t = (low+high)/2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i=low; _i<high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2*_i + bs*cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto b_noinsert; \ 184 } \ 185 } \ 186 if (b->nonew == 1) goto b_noinsert; \ 187 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 192 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 193 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 194 rp[_i] = bcol; \ 195 ap[bs2*_i + bs*cidx + ridx] = value; \ 196 B->nonzerostate++;\ 197 b_noinsert:; \ 198 bilen[brow] = nrow; \ 199 } 200 201 /* Only add/insert a(i,j) with i<=j (blocks). 202 Any a(i,j) with i>j input by user is ingored or generates an error 203 */ 204 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 205 { 206 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 207 MatScalar value; 208 PetscBool roworiented = baij->roworiented; 209 PetscErrorCode ierr; 210 PetscInt i,j,row,col; 211 PetscInt rstart_orig=mat->rmap->rstart; 212 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 213 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 214 215 /* Some Variables required in the macro */ 216 Mat A = baij->A; 217 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 218 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 219 MatScalar *aa =a->a; 220 221 Mat B = baij->B; 222 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 223 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 224 MatScalar *ba =b->a; 225 226 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 227 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 228 MatScalar *ap,*bap; 229 230 /* for stash */ 231 PetscInt n_loc, *in_loc = NULL; 232 MatScalar *v_loc = NULL; 233 234 PetscFunctionBegin; 235 if (!baij->donotstash) { 236 if (n > baij->n_loc) { 237 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 238 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 239 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 240 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 241 242 baij->n_loc = n; 243 } 244 in_loc = baij->in_loc; 245 v_loc = baij->v_loc; 246 } 247 248 for (i=0; i<m; i++) { 249 if (im[i] < 0) continue; 250 #if defined(PETSC_USE_DEBUG) 251 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 252 #endif 253 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 254 row = im[i] - rstart_orig; /* local row index */ 255 for (j=0; j<n; j++) { 256 if (im[i]/bs > in[j]/bs) { 257 if (a->ignore_ltriangular) { 258 continue; /* ignore lower triangular blocks */ 259 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 260 } 261 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 262 col = in[j] - cstart_orig; /* local col index */ 263 brow = row/bs; bcol = col/bs; 264 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 265 if (roworiented) value = v[i*n+j]; 266 else value = v[i+j*m]; 267 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 268 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 269 } else if (in[j] < 0) continue; 270 #if defined(PETSC_USE_DEBUG) 271 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 272 #endif 273 else { /* off-diag entry (B) */ 274 if (mat->was_assembled) { 275 if (!baij->colmap) { 276 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 277 } 278 #if defined(PETSC_USE_CTABLE) 279 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 280 col = col - 1; 281 #else 282 col = baij->colmap[in[j]/bs] - 1; 283 #endif 284 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 285 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 286 col = in[j]; 287 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 288 B = baij->B; 289 b = (Mat_SeqBAIJ*)(B)->data; 290 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 291 ba = b->a; 292 } else col += in[j]%bs; 293 } else col = in[j]; 294 if (roworiented) value = v[i*n+j]; 295 else value = v[i+j*m]; 296 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 297 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 298 } 299 } 300 } else { /* off processor entry */ 301 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 302 if (!baij->donotstash) { 303 mat->assembled = PETSC_FALSE; 304 n_loc = 0; 305 for (j=0; j<n; j++) { 306 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 307 in_loc[n_loc] = in[j]; 308 if (roworiented) { 309 v_loc[n_loc] = v[i*n+j]; 310 } else { 311 v_loc[n_loc] = v[j*m+i]; 312 } 313 n_loc++; 314 } 315 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 316 } 317 } 318 } 319 PetscFunctionReturn(0); 320 } 321 322 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 323 { 324 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 325 PetscErrorCode ierr; 326 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 327 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 328 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 329 PetscBool roworiented=a->roworiented; 330 const PetscScalar *value = v; 331 MatScalar *ap,*aa = a->a,*bap; 332 333 PetscFunctionBegin; 334 if (col < row) { 335 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 336 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 337 } 338 rp = aj + ai[row]; 339 ap = aa + bs2*ai[row]; 340 rmax = imax[row]; 341 nrow = ailen[row]; 342 value = v; 343 low = 0; 344 high = nrow; 345 346 while (high-low > 7) { 347 t = (low+high)/2; 348 if (rp[t] > col) high = t; 349 else low = t; 350 } 351 for (i=low; i<high; i++) { 352 if (rp[i] > col) break; 353 if (rp[i] == col) { 354 bap = ap + bs2*i; 355 if (roworiented) { 356 if (is == ADD_VALUES) { 357 for (ii=0; ii<bs; ii++) { 358 for (jj=ii; jj<bs2; jj+=bs) { 359 bap[jj] += *value++; 360 } 361 } 362 } else { 363 for (ii=0; ii<bs; ii++) { 364 for (jj=ii; jj<bs2; jj+=bs) { 365 bap[jj] = *value++; 366 } 367 } 368 } 369 } else { 370 if (is == ADD_VALUES) { 371 for (ii=0; ii<bs; ii++) { 372 for (jj=0; jj<bs; jj++) { 373 *bap++ += *value++; 374 } 375 } 376 } else { 377 for (ii=0; ii<bs; ii++) { 378 for (jj=0; jj<bs; jj++) { 379 *bap++ = *value++; 380 } 381 } 382 } 383 } 384 goto noinsert2; 385 } 386 } 387 if (nonew == 1) goto noinsert2; 388 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 389 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 390 N = nrow++ - 1; high++; 391 /* shift up all the later entries in this row */ 392 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 393 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 394 rp[i] = col; 395 bap = ap + bs2*i; 396 if (roworiented) { 397 for (ii=0; ii<bs; ii++) { 398 for (jj=ii; jj<bs2; jj+=bs) { 399 bap[jj] = *value++; 400 } 401 } 402 } else { 403 for (ii=0; ii<bs; ii++) { 404 for (jj=0; jj<bs; jj++) { 405 *bap++ = *value++; 406 } 407 } 408 } 409 noinsert2:; 410 ailen[row] = nrow; 411 PetscFunctionReturn(0); 412 } 413 414 /* 415 This routine is exactly duplicated in mpibaij.c 416 */ 417 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 418 { 419 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 420 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 421 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 422 PetscErrorCode ierr; 423 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 424 PetscBool roworiented=a->roworiented; 425 const PetscScalar *value = v; 426 MatScalar *ap,*aa = a->a,*bap; 427 428 PetscFunctionBegin; 429 rp = aj + ai[row]; 430 ap = aa + bs2*ai[row]; 431 rmax = imax[row]; 432 nrow = ailen[row]; 433 low = 0; 434 high = nrow; 435 value = v; 436 while (high-low > 7) { 437 t = (low+high)/2; 438 if (rp[t] > col) high = t; 439 else low = t; 440 } 441 for (i=low; i<high; i++) { 442 if (rp[i] > col) break; 443 if (rp[i] == col) { 444 bap = ap + bs2*i; 445 if (roworiented) { 446 if (is == ADD_VALUES) { 447 for (ii=0; ii<bs; ii++) { 448 for (jj=ii; jj<bs2; jj+=bs) { 449 bap[jj] += *value++; 450 } 451 } 452 } else { 453 for (ii=0; ii<bs; ii++) { 454 for (jj=ii; jj<bs2; jj+=bs) { 455 bap[jj] = *value++; 456 } 457 } 458 } 459 } else { 460 if (is == ADD_VALUES) { 461 for (ii=0; ii<bs; ii++,value+=bs) { 462 for (jj=0; jj<bs; jj++) { 463 bap[jj] += value[jj]; 464 } 465 bap += bs; 466 } 467 } else { 468 for (ii=0; ii<bs; ii++,value+=bs) { 469 for (jj=0; jj<bs; jj++) { 470 bap[jj] = value[jj]; 471 } 472 bap += bs; 473 } 474 } 475 } 476 goto noinsert2; 477 } 478 } 479 if (nonew == 1) goto noinsert2; 480 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 481 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 482 N = nrow++ - 1; high++; 483 /* shift up all the later entries in this row */ 484 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 485 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 486 rp[i] = col; 487 bap = ap + bs2*i; 488 if (roworiented) { 489 for (ii=0; ii<bs; ii++) { 490 for (jj=ii; jj<bs2; jj+=bs) { 491 bap[jj] = *value++; 492 } 493 } 494 } else { 495 for (ii=0; ii<bs; ii++) { 496 for (jj=0; jj<bs; jj++) { 497 *bap++ = *value++; 498 } 499 } 500 } 501 noinsert2:; 502 ailen[row] = nrow; 503 PetscFunctionReturn(0); 504 } 505 506 /* 507 This routine could be optimized by removing the need for the block copy below and passing stride information 508 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 509 */ 510 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 511 { 512 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 513 const MatScalar *value; 514 MatScalar *barray =baij->barray; 515 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 516 PetscErrorCode ierr; 517 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 518 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 519 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 520 521 PetscFunctionBegin; 522 if (!barray) { 523 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 524 baij->barray = barray; 525 } 526 527 if (roworiented) { 528 stepval = (n-1)*bs; 529 } else { 530 stepval = (m-1)*bs; 531 } 532 for (i=0; i<m; i++) { 533 if (im[i] < 0) continue; 534 #if defined(PETSC_USE_DEBUG) 535 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 536 #endif 537 if (im[i] >= rstart && im[i] < rend) { 538 row = im[i] - rstart; 539 for (j=0; j<n; j++) { 540 if (im[i] > in[j]) { 541 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 542 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 543 } 544 /* If NumCol = 1 then a copy is not required */ 545 if ((roworiented) && (n == 1)) { 546 barray = (MatScalar*) v + i*bs2; 547 } else if ((!roworiented) && (m == 1)) { 548 barray = (MatScalar*) v + j*bs2; 549 } else { /* Here a copy is required */ 550 if (roworiented) { 551 value = v + i*(stepval+bs)*bs + j*bs; 552 } else { 553 value = v + j*(stepval+bs)*bs + i*bs; 554 } 555 for (ii=0; ii<bs; ii++,value+=stepval) { 556 for (jj=0; jj<bs; jj++) { 557 *barray++ = *value++; 558 } 559 } 560 barray -=bs2; 561 } 562 563 if (in[j] >= cstart && in[j] < cend) { 564 col = in[j] - cstart; 565 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 566 } else if (in[j] < 0) continue; 567 #if defined(PETSC_USE_DEBUG) 568 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 569 #endif 570 else { 571 if (mat->was_assembled) { 572 if (!baij->colmap) { 573 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 574 } 575 576 #if defined(PETSC_USE_DEBUG) 577 #if defined(PETSC_USE_CTABLE) 578 { PetscInt data; 579 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 580 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 581 } 582 #else 583 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 584 #endif 585 #endif 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col = (col - 1)/bs; 589 #else 590 col = (baij->colmap[in[j]] - 1)/bs; 591 #endif 592 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 593 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 } 596 } else col = in[j]; 597 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 598 } 599 } 600 } else { 601 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 602 if (!baij->donotstash) { 603 if (roworiented) { 604 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 605 } else { 606 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 607 } 608 } 609 } 610 } 611 PetscFunctionReturn(0); 612 } 613 614 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 615 { 616 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 617 PetscErrorCode ierr; 618 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 619 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 620 621 PetscFunctionBegin; 622 for (i=0; i<m; i++) { 623 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 624 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 625 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 626 row = idxm[i] - bsrstart; 627 for (j=0; j<n; j++) { 628 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 629 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 630 if (idxn[j] >= bscstart && idxn[j] < bscend) { 631 col = idxn[j] - bscstart; 632 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } else { 634 if (!baij->colmap) { 635 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 636 } 637 #if defined(PETSC_USE_CTABLE) 638 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 639 data--; 640 #else 641 data = baij->colmap[idxn[j]/bs]-1; 642 #endif 643 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 644 else { 645 col = data + idxn[j]%bs; 646 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } 648 } 649 } 650 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 651 } 652 PetscFunctionReturn(0); 653 } 654 655 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 656 { 657 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 658 PetscErrorCode ierr; 659 PetscReal sum[2],*lnorm2; 660 661 PetscFunctionBegin; 662 if (baij->size == 1) { 663 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 664 } else { 665 if (type == NORM_FROBENIUS) { 666 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 667 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 668 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 669 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 670 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 671 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 672 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 673 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 674 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 675 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 676 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 677 PetscReal *rsum,*rsum2,vabs; 678 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 679 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 680 MatScalar *v; 681 682 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 683 ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr); 684 /* Amat */ 685 v = amat->a; jj = amat->j; 686 for (brow=0; brow<mbs; brow++) { 687 grow = bs*(rstart + brow); 688 nz = amat->i[brow+1] - amat->i[brow]; 689 for (bcol=0; bcol<nz; bcol++) { 690 gcol = bs*(rstart + *jj); jj++; 691 for (col=0; col<bs; col++) { 692 for (row=0; row<bs; row++) { 693 vabs = PetscAbsScalar(*v); v++; 694 rsum[gcol+col] += vabs; 695 /* non-diagonal block */ 696 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 697 } 698 } 699 } 700 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 701 } 702 /* Bmat */ 703 v = bmat->a; jj = bmat->j; 704 for (brow=0; brow<mbs; brow++) { 705 grow = bs*(rstart + brow); 706 nz = bmat->i[brow+1] - bmat->i[brow]; 707 for (bcol=0; bcol<nz; bcol++) { 708 gcol = bs*garray[*jj]; jj++; 709 for (col=0; col<bs; col++) { 710 for (row=0; row<bs; row++) { 711 vabs = PetscAbsScalar(*v); v++; 712 rsum[gcol+col] += vabs; 713 rsum[grow+row] += vabs; 714 } 715 } 716 } 717 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 718 } 719 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 720 *norm = 0.0; 721 for (col=0; col<mat->cmap->N; col++) { 722 if (rsum2[col] > *norm) *norm = rsum2[col]; 723 } 724 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 725 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 726 } 727 PetscFunctionReturn(0); 728 } 729 730 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 731 { 732 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 733 PetscErrorCode ierr; 734 PetscInt nstash,reallocs; 735 736 PetscFunctionBegin; 737 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 738 739 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 740 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 741 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 742 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 743 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 744 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 749 { 750 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 751 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 752 PetscErrorCode ierr; 753 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 754 PetscInt *row,*col; 755 PetscBool other_disassembled; 756 PetscMPIInt n; 757 PetscBool r1,r2,r3; 758 MatScalar *val; 759 760 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 761 PetscFunctionBegin; 762 if (!baij->donotstash && !mat->nooffprocentries) { 763 while (1) { 764 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 765 if (!flg) break; 766 767 for (i=0; i<n;) { 768 /* Now identify the consecutive vals belonging to the same row */ 769 for (j=i,rstart=row[j]; j<n; j++) { 770 if (row[j] != rstart) break; 771 } 772 if (j < n) ncols = j-i; 773 else ncols = n-i; 774 /* Now assemble all these values with a single function call */ 775 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 776 i = j; 777 } 778 } 779 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 780 /* Now process the block-stash. Since the values are stashed column-oriented, 781 set the roworiented flag to column oriented, and after MatSetValues() 782 restore the original flags */ 783 r1 = baij->roworiented; 784 r2 = a->roworiented; 785 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 786 787 baij->roworiented = PETSC_FALSE; 788 a->roworiented = PETSC_FALSE; 789 790 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 791 while (1) { 792 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 793 if (!flg) break; 794 795 for (i=0; i<n;) { 796 /* Now identify the consecutive vals belonging to the same row */ 797 for (j=i,rstart=row[j]; j<n; j++) { 798 if (row[j] != rstart) break; 799 } 800 if (j < n) ncols = j-i; 801 else ncols = n-i; 802 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 803 i = j; 804 } 805 } 806 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 807 808 baij->roworiented = r1; 809 a->roworiented = r2; 810 811 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 812 } 813 814 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 816 817 /* determine if any processor has disassembled, if so we must 818 also disassemble ourselfs, in order that we may reassemble. */ 819 /* 820 if nonzero structure of submatrix B cannot change then we know that 821 no processor disassembled thus we can skip this stuff 822 */ 823 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 824 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 825 if (mat->was_assembled && !other_disassembled) { 826 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 827 } 828 } 829 830 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 831 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 832 } 833 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 835 836 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 837 838 baij->rowvalues = 0; 839 840 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 841 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 842 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 843 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 844 } 845 PetscFunctionReturn(0); 846 } 847 848 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 849 #include <petscdraw.h> 850 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 851 { 852 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 853 PetscErrorCode ierr; 854 PetscInt bs = mat->rmap->bs; 855 PetscMPIInt rank = baij->rank; 856 PetscBool iascii,isdraw; 857 PetscViewer sviewer; 858 PetscViewerFormat format; 859 860 PetscFunctionBegin; 861 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 862 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 863 if (iascii) { 864 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 865 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 866 MatInfo info; 867 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 868 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 869 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 870 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 871 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 872 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 873 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 874 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 875 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 876 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 877 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 878 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } else if (format == PETSC_VIEWER_ASCII_INFO) { 881 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 882 PetscFunctionReturn(0); 883 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 884 PetscFunctionReturn(0); 885 } 886 } 887 888 if (isdraw) { 889 PetscDraw draw; 890 PetscBool isnull; 891 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 892 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 893 if (isnull) PetscFunctionReturn(0); 894 } 895 896 { 897 /* assemble the entire matrix onto first processor. */ 898 Mat A; 899 Mat_SeqSBAIJ *Aloc; 900 Mat_SeqBAIJ *Bloc; 901 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 902 MatScalar *a; 903 const char *matname; 904 905 /* Should this be the same type as mat? */ 906 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 907 if (!rank) { 908 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 909 } else { 910 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 911 } 912 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 913 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 914 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 915 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 916 917 /* copy over the A part */ 918 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 919 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 920 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 921 922 for (i=0; i<mbs; i++) { 923 rvals[0] = bs*(baij->rstartbs + i); 924 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 925 for (j=ai[i]; j<ai[i+1]; j++) { 926 col = (baij->cstartbs+aj[j])*bs; 927 for (k=0; k<bs; k++) { 928 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 929 col++; 930 a += bs; 931 } 932 } 933 } 934 /* copy over the B part */ 935 Bloc = (Mat_SeqBAIJ*)baij->B->data; 936 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 937 for (i=0; i<mbs; i++) { 938 939 rvals[0] = bs*(baij->rstartbs + i); 940 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 941 for (j=ai[i]; j<ai[i+1]; j++) { 942 col = baij->garray[aj[j]]*bs; 943 for (k=0; k<bs; k++) { 944 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 945 col++; 946 a += bs; 947 } 948 } 949 } 950 ierr = PetscFree(rvals);CHKERRQ(ierr); 951 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 952 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 953 /* 954 Everyone has to call to draw the matrix since the graphics waits are 955 synchronized across all processors that share the PetscDraw object 956 */ 957 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 958 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 959 if (!rank) { 960 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 961 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 962 } 963 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 964 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 965 ierr = MatDestroy(&A);CHKERRQ(ierr); 966 } 967 PetscFunctionReturn(0); 968 } 969 970 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 971 { 972 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 973 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 974 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 975 PetscErrorCode ierr; 976 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 977 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 978 int fd; 979 PetscScalar *column_values; 980 FILE *file; 981 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 982 PetscInt message_count,flowcontrolcount; 983 984 PetscFunctionBegin; 985 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 986 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 987 nz = bs2*(A->nz + B->nz); 988 rlen = mat->rmap->n; 989 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 990 if (!rank) { 991 header[0] = MAT_FILE_CLASSID; 992 header[1] = mat->rmap->N; 993 header[2] = mat->cmap->N; 994 995 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 996 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 997 /* get largest number of rows any processor has */ 998 range = mat->rmap->range; 999 for (i=1; i<size; i++) { 1000 rlen = PetscMax(rlen,range[i+1] - range[i]); 1001 } 1002 } else { 1003 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1004 } 1005 1006 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1007 /* compute lengths of each row */ 1008 for (i=0; i<a->mbs; i++) { 1009 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1010 } 1011 /* store the row lengths to the file */ 1012 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1013 if (!rank) { 1014 MPI_Status status; 1015 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1016 rlen = (range[1] - range[0])/bs; 1017 for (i=0; i<rlen; i++) { 1018 for (j=0; j<bs; j++) { 1019 row_lens[i*bs+j] = bs*crow_lens[i]; 1020 } 1021 } 1022 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 rlen = (range[i+1] - range[i])/bs; 1025 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1026 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1027 for (k=0; k<rlen; k++) { 1028 for (j=0; j<bs; j++) { 1029 row_lens[k*bs+j] = bs*crow_lens[k]; 1030 } 1031 } 1032 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1033 } 1034 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1035 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1036 } else { 1037 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1038 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1039 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1040 } 1041 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1042 1043 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1044 information needed to make it for each row from a block row. This does require more communication but still not more than 1045 the communication needed for the nonzero values */ 1046 nzmax = nz; /* space a largest processor needs */ 1047 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1048 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1049 cnt = 0; 1050 for (i=0; i<a->mbs; i++) { 1051 pcnt = cnt; 1052 for (j=B->i[i]; j<B->i[i+1]; j++) { 1053 if ((col = garray[B->j[j]]) > cstart) break; 1054 for (l=0; l<bs; l++) { 1055 column_indices[cnt++] = bs*col+l; 1056 } 1057 } 1058 for (k=A->i[i]; k<A->i[i+1]; k++) { 1059 for (l=0; l<bs; l++) { 1060 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1061 } 1062 } 1063 for (; j<B->i[i+1]; j++) { 1064 for (l=0; l<bs; l++) { 1065 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1066 } 1067 } 1068 len = cnt - pcnt; 1069 for (k=1; k<bs; k++) { 1070 ierr = PetscArraycpy(&column_indices[cnt],&column_indices[pcnt],len);CHKERRQ(ierr); 1071 cnt += len; 1072 } 1073 } 1074 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1075 1076 /* store the columns to the file */ 1077 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1078 if (!rank) { 1079 MPI_Status status; 1080 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1081 for (i=1; i<size; i++) { 1082 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1083 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1084 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1085 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1086 } 1087 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1088 } else { 1089 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1090 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1091 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1092 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1095 1096 /* load up the numerical values */ 1097 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1098 cnt = 0; 1099 for (i=0; i<a->mbs; i++) { 1100 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1101 for (j=B->i[i]; j<B->i[i+1]; j++) { 1102 if (garray[B->j[j]] > cstart) break; 1103 for (l=0; l<bs; l++) { 1104 for (ll=0; ll<bs; ll++) { 1105 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1106 } 1107 } 1108 cnt += bs; 1109 } 1110 for (k=A->i[i]; k<A->i[i+1]; k++) { 1111 for (l=0; l<bs; l++) { 1112 for (ll=0; ll<bs; ll++) { 1113 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1114 } 1115 } 1116 cnt += bs; 1117 } 1118 for (; j<B->i[i+1]; j++) { 1119 for (l=0; l<bs; l++) { 1120 for (ll=0; ll<bs; ll++) { 1121 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1122 } 1123 } 1124 cnt += bs; 1125 } 1126 cnt += (bs-1)*rlen; 1127 } 1128 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1129 1130 /* store the column values to the file */ 1131 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1132 if (!rank) { 1133 MPI_Status status; 1134 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1135 for (i=1; i<size; i++) { 1136 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1137 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1138 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1139 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1140 } 1141 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1142 } else { 1143 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1144 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1145 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1146 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1147 } 1148 ierr = PetscFree(column_values);CHKERRQ(ierr); 1149 1150 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1151 if (file) { 1152 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1153 } 1154 PetscFunctionReturn(0); 1155 } 1156 1157 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1158 { 1159 PetscErrorCode ierr; 1160 PetscBool iascii,isdraw,issocket,isbinary; 1161 1162 PetscFunctionBegin; 1163 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1164 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1165 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1166 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1167 if (iascii || isdraw || issocket) { 1168 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1169 } else if (isbinary) { 1170 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 1175 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1176 { 1177 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1178 PetscErrorCode ierr; 1179 1180 PetscFunctionBegin; 1181 #if defined(PETSC_USE_LOG) 1182 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1183 #endif 1184 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1185 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1186 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1187 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1188 #if defined(PETSC_USE_CTABLE) 1189 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1190 #else 1191 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1192 #endif 1193 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1194 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1195 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1196 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1197 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1198 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1199 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1200 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1201 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1202 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1203 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1204 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1205 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1206 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1207 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1208 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1209 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1210 #endif 1211 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1212 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1213 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1214 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1215 1216 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1221 #if defined(PETSC_HAVE_ELEMENTAL) 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1223 #endif 1224 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1230 { 1231 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1234 PetscScalar *from; 1235 const PetscScalar *x; 1236 1237 PetscFunctionBegin; 1238 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1239 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1240 1241 /* diagonal part */ 1242 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1243 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1244 1245 /* subdiagonal part */ 1246 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1247 1248 /* copy x into the vec slvec0 */ 1249 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1250 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1251 1252 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1253 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1254 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1255 1256 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1257 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1258 /* supperdiagonal part */ 1259 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1264 { 1265 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1266 PetscErrorCode ierr; 1267 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1268 PetscScalar *from; 1269 const PetscScalar *x; 1270 1271 PetscFunctionBegin; 1272 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1273 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1274 1275 /* diagonal part */ 1276 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1277 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1278 1279 /* subdiagonal part */ 1280 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1281 1282 /* copy x into the vec slvec0 */ 1283 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1284 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1285 1286 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1287 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1288 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1289 1290 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1291 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1292 /* supperdiagonal part */ 1293 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1298 { 1299 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1300 PetscErrorCode ierr; 1301 PetscInt nt; 1302 1303 PetscFunctionBegin; 1304 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1305 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1306 1307 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1308 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1309 1310 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1311 /* do diagonal part */ 1312 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1313 /* do supperdiagonal part */ 1314 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1315 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1316 /* do subdiagonal part */ 1317 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1318 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1319 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1324 { 1325 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1326 PetscErrorCode ierr; 1327 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1328 PetscScalar *from,zero=0.0; 1329 const PetscScalar *x; 1330 1331 PetscFunctionBegin; 1332 /* 1333 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1334 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1335 */ 1336 /* diagonal part */ 1337 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1338 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1339 1340 /* subdiagonal part */ 1341 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1342 1343 /* copy x into the vec slvec0 */ 1344 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1345 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1346 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1347 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1348 1349 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1350 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1351 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1352 1353 /* supperdiagonal part */ 1354 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1359 { 1360 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1365 /* do diagonal part */ 1366 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1367 /* do supperdiagonal part */ 1368 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1369 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1370 1371 /* do subdiagonal part */ 1372 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1373 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1374 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 /* 1379 This only works correctly for square matrices where the subblock A->A is the 1380 diagonal block 1381 */ 1382 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1383 { 1384 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1385 PetscErrorCode ierr; 1386 1387 PetscFunctionBegin; 1388 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1389 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1394 { 1395 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1396 PetscErrorCode ierr; 1397 1398 PetscFunctionBegin; 1399 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1400 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1405 { 1406 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1407 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1408 PetscErrorCode ierr; 1409 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1410 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1411 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1412 1413 PetscFunctionBegin; 1414 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1415 mat->getrowactive = PETSC_TRUE; 1416 1417 if (!mat->rowvalues && (idx || v)) { 1418 /* 1419 allocate enough space to hold information from the longest row. 1420 */ 1421 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1422 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1423 PetscInt max = 1,mbs = mat->mbs,tmp; 1424 for (i=0; i<mbs; i++) { 1425 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1426 if (max < tmp) max = tmp; 1427 } 1428 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1429 } 1430 1431 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1432 lrow = row - brstart; /* local row index */ 1433 1434 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1435 if (!v) {pvA = 0; pvB = 0;} 1436 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1437 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1438 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1439 nztot = nzA + nzB; 1440 1441 cmap = mat->garray; 1442 if (v || idx) { 1443 if (nztot) { 1444 /* Sort by increasing column numbers, assuming A and B already sorted */ 1445 PetscInt imark = -1; 1446 if (v) { 1447 *v = v_p = mat->rowvalues; 1448 for (i=0; i<nzB; i++) { 1449 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1450 else break; 1451 } 1452 imark = i; 1453 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1454 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1455 } 1456 if (idx) { 1457 *idx = idx_p = mat->rowindices; 1458 if (imark > -1) { 1459 for (i=0; i<imark; i++) { 1460 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1461 } 1462 } else { 1463 for (i=0; i<nzB; i++) { 1464 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1465 else break; 1466 } 1467 imark = i; 1468 } 1469 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1470 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1471 } 1472 } else { 1473 if (idx) *idx = 0; 1474 if (v) *v = 0; 1475 } 1476 } 1477 *nz = nztot; 1478 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1479 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } 1482 1483 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1484 { 1485 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1486 1487 PetscFunctionBegin; 1488 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1489 baij->getrowactive = PETSC_FALSE; 1490 PetscFunctionReturn(0); 1491 } 1492 1493 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1494 { 1495 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1496 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1497 1498 PetscFunctionBegin; 1499 aA->getrow_utriangular = PETSC_TRUE; 1500 PetscFunctionReturn(0); 1501 } 1502 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1503 { 1504 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1505 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1506 1507 PetscFunctionBegin; 1508 aA->getrow_utriangular = PETSC_FALSE; 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1513 { 1514 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1515 PetscErrorCode ierr; 1516 1517 PetscFunctionBegin; 1518 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1519 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1524 { 1525 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1526 PetscErrorCode ierr; 1527 1528 PetscFunctionBegin; 1529 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1530 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1535 Input: isrow - distributed(parallel), 1536 iscol_local - locally owned (seq) 1537 */ 1538 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1539 { 1540 PetscErrorCode ierr; 1541 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1542 const PetscInt *ptr1,*ptr2; 1543 1544 PetscFunctionBegin; 1545 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1546 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1547 if (sz1 > sz2) { 1548 *flg = PETSC_FALSE; 1549 PetscFunctionReturn(0); 1550 } 1551 1552 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1553 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1554 1555 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1556 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1557 ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr); 1558 ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr); 1559 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1560 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1561 1562 nmatch=0; 1563 k = 0; 1564 for (i=0; i<sz1; i++){ 1565 for (j=k; j<sz2; j++){ 1566 if (a1[i] == a2[j]) { 1567 k = j; nmatch++; 1568 break; 1569 } 1570 } 1571 } 1572 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1573 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1574 ierr = PetscFree(a1);CHKERRQ(ierr); 1575 ierr = PetscFree(a2);CHKERRQ(ierr); 1576 if (nmatch < sz1) { 1577 *flg = PETSC_FALSE; 1578 } else { 1579 *flg = PETSC_TRUE; 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1585 { 1586 PetscErrorCode ierr; 1587 IS iscol_local; 1588 PetscInt csize; 1589 PetscBool isequal; 1590 1591 PetscFunctionBegin; 1592 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1593 if (call == MAT_REUSE_MATRIX) { 1594 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1595 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1596 } else { 1597 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1598 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1599 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1600 } 1601 1602 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1603 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1604 if (call == MAT_INITIAL_MATRIX) { 1605 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1606 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1607 } 1608 PetscFunctionReturn(0); 1609 } 1610 1611 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1612 { 1613 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1614 PetscErrorCode ierr; 1615 1616 PetscFunctionBegin; 1617 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1618 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1623 { 1624 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1625 Mat A = a->A,B = a->B; 1626 PetscErrorCode ierr; 1627 PetscReal isend[5],irecv[5]; 1628 1629 PetscFunctionBegin; 1630 info->block_size = (PetscReal)matin->rmap->bs; 1631 1632 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1633 1634 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1635 isend[3] = info->memory; isend[4] = info->mallocs; 1636 1637 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1638 1639 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1640 isend[3] += info->memory; isend[4] += info->mallocs; 1641 if (flag == MAT_LOCAL) { 1642 info->nz_used = isend[0]; 1643 info->nz_allocated = isend[1]; 1644 info->nz_unneeded = isend[2]; 1645 info->memory = isend[3]; 1646 info->mallocs = isend[4]; 1647 } else if (flag == MAT_GLOBAL_MAX) { 1648 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1649 1650 info->nz_used = irecv[0]; 1651 info->nz_allocated = irecv[1]; 1652 info->nz_unneeded = irecv[2]; 1653 info->memory = irecv[3]; 1654 info->mallocs = irecv[4]; 1655 } else if (flag == MAT_GLOBAL_SUM) { 1656 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1657 1658 info->nz_used = irecv[0]; 1659 info->nz_allocated = irecv[1]; 1660 info->nz_unneeded = irecv[2]; 1661 info->memory = irecv[3]; 1662 info->mallocs = irecv[4]; 1663 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1664 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1665 info->fill_ratio_needed = 0; 1666 info->factor_mallocs = 0; 1667 PetscFunctionReturn(0); 1668 } 1669 1670 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1671 { 1672 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1673 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1674 PetscErrorCode ierr; 1675 1676 PetscFunctionBegin; 1677 switch (op) { 1678 case MAT_NEW_NONZERO_LOCATIONS: 1679 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1680 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1681 case MAT_KEEP_NONZERO_PATTERN: 1682 case MAT_SUBMAT_SINGLEIS: 1683 case MAT_NEW_NONZERO_LOCATION_ERR: 1684 MatCheckPreallocated(A,1); 1685 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1686 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1687 break; 1688 case MAT_ROW_ORIENTED: 1689 MatCheckPreallocated(A,1); 1690 a->roworiented = flg; 1691 1692 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1693 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1694 break; 1695 case MAT_NEW_DIAGONALS: 1696 case MAT_SORTED_FULL: 1697 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1698 break; 1699 case MAT_IGNORE_OFF_PROC_ENTRIES: 1700 a->donotstash = flg; 1701 break; 1702 case MAT_USE_HASH_TABLE: 1703 a->ht_flag = flg; 1704 break; 1705 case MAT_HERMITIAN: 1706 MatCheckPreallocated(A,1); 1707 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1708 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1709 #if defined(PETSC_USE_COMPLEX) 1710 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1711 #endif 1712 break; 1713 case MAT_SPD: 1714 A->spd_set = PETSC_TRUE; 1715 A->spd = flg; 1716 if (flg) { 1717 A->symmetric = PETSC_TRUE; 1718 A->structurally_symmetric = PETSC_TRUE; 1719 A->symmetric_set = PETSC_TRUE; 1720 A->structurally_symmetric_set = PETSC_TRUE; 1721 } 1722 break; 1723 case MAT_SYMMETRIC: 1724 MatCheckPreallocated(A,1); 1725 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1726 break; 1727 case MAT_STRUCTURALLY_SYMMETRIC: 1728 MatCheckPreallocated(A,1); 1729 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1730 break; 1731 case MAT_SYMMETRY_ETERNAL: 1732 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1733 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1734 break; 1735 case MAT_IGNORE_LOWER_TRIANGULAR: 1736 aA->ignore_ltriangular = flg; 1737 break; 1738 case MAT_ERROR_LOWER_TRIANGULAR: 1739 aA->ignore_ltriangular = flg; 1740 break; 1741 case MAT_GETROW_UPPERTRIANGULAR: 1742 aA->getrow_utriangular = flg; 1743 break; 1744 default: 1745 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1746 } 1747 PetscFunctionReturn(0); 1748 } 1749 1750 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1751 { 1752 PetscErrorCode ierr; 1753 1754 PetscFunctionBegin; 1755 if (reuse == MAT_INITIAL_MATRIX) { 1756 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1757 } else if (reuse == MAT_REUSE_MATRIX) { 1758 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1759 } 1760 PetscFunctionReturn(0); 1761 } 1762 1763 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1764 { 1765 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1766 Mat a = baij->A, b=baij->B; 1767 PetscErrorCode ierr; 1768 PetscInt nv,m,n; 1769 PetscBool flg; 1770 1771 PetscFunctionBegin; 1772 if (ll != rr) { 1773 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1774 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1775 } 1776 if (!ll) PetscFunctionReturn(0); 1777 1778 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1779 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1780 1781 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1782 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1783 1784 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1785 1786 /* left diagonalscale the off-diagonal part */ 1787 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1788 1789 /* scale the diagonal part */ 1790 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1791 1792 /* right diagonalscale the off-diagonal part */ 1793 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1794 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1795 PetscFunctionReturn(0); 1796 } 1797 1798 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1799 { 1800 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1801 PetscErrorCode ierr; 1802 1803 PetscFunctionBegin; 1804 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1809 1810 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1811 { 1812 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1813 Mat a,b,c,d; 1814 PetscBool flg; 1815 PetscErrorCode ierr; 1816 1817 PetscFunctionBegin; 1818 a = matA->A; b = matA->B; 1819 c = matB->A; d = matB->B; 1820 1821 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1822 if (flg) { 1823 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1824 } 1825 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 1829 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1830 { 1831 PetscErrorCode ierr; 1832 PetscBool isbaij; 1833 1834 PetscFunctionBegin; 1835 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 1836 if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1837 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1838 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1839 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1840 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1841 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1842 } else { 1843 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1844 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1845 1846 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1847 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1848 } 1849 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1850 PetscFunctionReturn(0); 1851 } 1852 1853 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1854 { 1855 PetscErrorCode ierr; 1856 1857 PetscFunctionBegin; 1858 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1859 PetscFunctionReturn(0); 1860 } 1861 1862 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1863 { 1864 PetscErrorCode ierr; 1865 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1866 PetscBLASInt bnz,one=1; 1867 Mat_SeqSBAIJ *xa,*ya; 1868 Mat_SeqBAIJ *xb,*yb; 1869 1870 PetscFunctionBegin; 1871 if (str == SAME_NONZERO_PATTERN) { 1872 PetscScalar alpha = a; 1873 xa = (Mat_SeqSBAIJ*)xx->A->data; 1874 ya = (Mat_SeqSBAIJ*)yy->A->data; 1875 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1876 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1877 xb = (Mat_SeqBAIJ*)xx->B->data; 1878 yb = (Mat_SeqBAIJ*)yy->B->data; 1879 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1880 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1881 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1882 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1883 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1884 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1885 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1886 } else { 1887 Mat B; 1888 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1889 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1890 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1891 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1892 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1893 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1894 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1895 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1896 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1897 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1898 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1899 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1900 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1901 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1902 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1903 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1904 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1905 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1906 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1907 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1908 } 1909 PetscFunctionReturn(0); 1910 } 1911 1912 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1913 { 1914 PetscErrorCode ierr; 1915 PetscInt i; 1916 PetscBool flg; 1917 1918 PetscFunctionBegin; 1919 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1920 for (i=0; i<n; i++) { 1921 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1922 if (!flg) { 1923 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1924 } 1925 } 1926 PetscFunctionReturn(0); 1927 } 1928 1929 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1930 { 1931 PetscErrorCode ierr; 1932 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1933 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1934 1935 PetscFunctionBegin; 1936 if (!Y->preallocated) { 1937 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1938 } else if (!aij->nz) { 1939 PetscInt nonew = aij->nonew; 1940 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1941 aij->nonew = nonew; 1942 } 1943 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1944 PetscFunctionReturn(0); 1945 } 1946 1947 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1948 { 1949 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1950 PetscErrorCode ierr; 1951 1952 PetscFunctionBegin; 1953 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1954 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1955 if (d) { 1956 PetscInt rstart; 1957 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1958 *d += rstart/A->rmap->bs; 1959 1960 } 1961 PetscFunctionReturn(0); 1962 } 1963 1964 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1965 { 1966 PetscFunctionBegin; 1967 *a = ((Mat_MPISBAIJ*)A->data)->A; 1968 PetscFunctionReturn(0); 1969 } 1970 1971 /* -------------------------------------------------------------------*/ 1972 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1973 MatGetRow_MPISBAIJ, 1974 MatRestoreRow_MPISBAIJ, 1975 MatMult_MPISBAIJ, 1976 /* 4*/ MatMultAdd_MPISBAIJ, 1977 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1978 MatMultAdd_MPISBAIJ, 1979 0, 1980 0, 1981 0, 1982 /* 10*/ 0, 1983 0, 1984 0, 1985 MatSOR_MPISBAIJ, 1986 MatTranspose_MPISBAIJ, 1987 /* 15*/ MatGetInfo_MPISBAIJ, 1988 MatEqual_MPISBAIJ, 1989 MatGetDiagonal_MPISBAIJ, 1990 MatDiagonalScale_MPISBAIJ, 1991 MatNorm_MPISBAIJ, 1992 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1993 MatAssemblyEnd_MPISBAIJ, 1994 MatSetOption_MPISBAIJ, 1995 MatZeroEntries_MPISBAIJ, 1996 /* 24*/ 0, 1997 0, 1998 0, 1999 0, 2000 0, 2001 /* 29*/ MatSetUp_MPISBAIJ, 2002 0, 2003 0, 2004 MatGetDiagonalBlock_MPISBAIJ, 2005 0, 2006 /* 34*/ MatDuplicate_MPISBAIJ, 2007 0, 2008 0, 2009 0, 2010 0, 2011 /* 39*/ MatAXPY_MPISBAIJ, 2012 MatCreateSubMatrices_MPISBAIJ, 2013 MatIncreaseOverlap_MPISBAIJ, 2014 MatGetValues_MPISBAIJ, 2015 MatCopy_MPISBAIJ, 2016 /* 44*/ 0, 2017 MatScale_MPISBAIJ, 2018 MatShift_MPISBAIJ, 2019 0, 2020 0, 2021 /* 49*/ 0, 2022 0, 2023 0, 2024 0, 2025 0, 2026 /* 54*/ 0, 2027 0, 2028 MatSetUnfactored_MPISBAIJ, 2029 0, 2030 MatSetValuesBlocked_MPISBAIJ, 2031 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 2032 0, 2033 0, 2034 0, 2035 0, 2036 /* 64*/ 0, 2037 0, 2038 0, 2039 0, 2040 0, 2041 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 2042 0, 2043 0, 2044 0, 2045 0, 2046 /* 74*/ 0, 2047 0, 2048 0, 2049 0, 2050 0, 2051 /* 79*/ 0, 2052 0, 2053 0, 2054 0, 2055 MatLoad_MPISBAIJ, 2056 /* 84*/ 0, 2057 0, 2058 0, 2059 0, 2060 0, 2061 /* 89*/ 0, 2062 0, 2063 0, 2064 0, 2065 0, 2066 /* 94*/ 0, 2067 0, 2068 0, 2069 0, 2070 0, 2071 /* 99*/ 0, 2072 0, 2073 0, 2074 0, 2075 0, 2076 /*104*/ 0, 2077 MatRealPart_MPISBAIJ, 2078 MatImaginaryPart_MPISBAIJ, 2079 MatGetRowUpperTriangular_MPISBAIJ, 2080 MatRestoreRowUpperTriangular_MPISBAIJ, 2081 /*109*/ 0, 2082 0, 2083 0, 2084 0, 2085 MatMissingDiagonal_MPISBAIJ, 2086 /*114*/ 0, 2087 0, 2088 0, 2089 0, 2090 0, 2091 /*119*/ 0, 2092 0, 2093 0, 2094 0, 2095 0, 2096 /*124*/ 0, 2097 0, 2098 0, 2099 0, 2100 0, 2101 /*129*/ 0, 2102 0, 2103 0, 2104 0, 2105 0, 2106 /*134*/ 0, 2107 0, 2108 0, 2109 0, 2110 0, 2111 /*139*/ MatSetBlockSizes_Default, 2112 0, 2113 0, 2114 0, 2115 0, 2116 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2117 }; 2118 2119 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2120 { 2121 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 2122 PetscErrorCode ierr; 2123 PetscInt i,mbs,Mbs; 2124 PetscMPIInt size; 2125 2126 PetscFunctionBegin; 2127 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2128 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2129 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2130 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2131 if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N); 2132 if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n); 2133 2134 mbs = B->rmap->n/bs; 2135 Mbs = B->rmap->N/bs; 2136 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2137 2138 B->rmap->bs = bs; 2139 b->bs2 = bs*bs; 2140 b->mbs = mbs; 2141 b->Mbs = Mbs; 2142 b->nbs = B->cmap->n/bs; 2143 b->Nbs = B->cmap->N/bs; 2144 2145 for (i=0; i<=b->size; i++) { 2146 b->rangebs[i] = B->rmap->range[i]/bs; 2147 } 2148 b->rstartbs = B->rmap->rstart/bs; 2149 b->rendbs = B->rmap->rend/bs; 2150 2151 b->cstartbs = B->cmap->rstart/bs; 2152 b->cendbs = B->cmap->rend/bs; 2153 2154 #if defined(PETSC_USE_CTABLE) 2155 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2156 #else 2157 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2158 #endif 2159 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2160 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2161 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2162 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2163 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2164 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2165 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2166 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2167 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2168 2169 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2170 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2171 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2172 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2173 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2174 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2175 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2176 2177 if (!B->preallocated) { 2178 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2179 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2180 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2181 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2182 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2183 } 2184 2185 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2186 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2187 2188 B->preallocated = PETSC_TRUE; 2189 B->was_assembled = PETSC_FALSE; 2190 B->assembled = PETSC_FALSE; 2191 PetscFunctionReturn(0); 2192 } 2193 2194 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2195 { 2196 PetscInt m,rstart,cend; 2197 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0; 2198 const PetscInt *JJ =0; 2199 PetscScalar *values=0; 2200 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 2201 PetscErrorCode ierr; 2202 2203 PetscFunctionBegin; 2204 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2205 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2206 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2207 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2208 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2209 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2210 m = B->rmap->n/bs; 2211 rstart = B->rmap->rstart/bs; 2212 cend = B->cmap->rend/bs; 2213 2214 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2215 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2216 for (i=0; i<m; i++) { 2217 nz = ii[i+1] - ii[i]; 2218 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2219 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2220 JJ = jj + ii[i]; 2221 bd = 0; 2222 for (j=0; j<nz; j++) { 2223 if (*JJ >= i + rstart) break; 2224 JJ++; 2225 bd++; 2226 } 2227 d = 0; 2228 for (; j<nz; j++) { 2229 if (*JJ++ >= cend) break; 2230 d++; 2231 } 2232 d_nnz[i] = d; 2233 o_nnz[i] = nz - d - bd; 2234 nz = nz - bd; 2235 nz_max = PetscMax(nz_max,nz); 2236 } 2237 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2238 ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2239 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2240 2241 values = (PetscScalar*)V; 2242 if (!values) { 2243 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2244 } 2245 for (i=0; i<m; i++) { 2246 PetscInt row = i + rstart; 2247 PetscInt ncols = ii[i+1] - ii[i]; 2248 const PetscInt *icols = jj + ii[i]; 2249 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2250 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2251 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2252 } else { /* block ordering does not match so we can only insert one block at a time. */ 2253 PetscInt j; 2254 for (j=0; j<ncols; j++) { 2255 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2256 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2257 } 2258 } 2259 } 2260 2261 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2262 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2263 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2264 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 /*MC 2269 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2270 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2271 the matrix is stored. 2272 2273 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2274 can call MatSetOption(Mat, MAT_HERMITIAN); 2275 2276 Options Database Keys: 2277 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2278 2279 Notes: 2280 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2281 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2282 2283 Level: beginner 2284 2285 .seealso: MatCreateMPISBAIJ(), MATSEQSBAIJ, MatType 2286 M*/ 2287 2288 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2289 { 2290 Mat_MPISBAIJ *b; 2291 PetscErrorCode ierr; 2292 PetscBool flg = PETSC_FALSE; 2293 2294 PetscFunctionBegin; 2295 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2296 B->data = (void*)b; 2297 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2298 2299 B->ops->destroy = MatDestroy_MPISBAIJ; 2300 B->ops->view = MatView_MPISBAIJ; 2301 B->assembled = PETSC_FALSE; 2302 B->insertmode = NOT_SET_VALUES; 2303 2304 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2305 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2306 2307 /* build local table of row and column ownerships */ 2308 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2309 2310 /* build cache for off array entries formed */ 2311 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2312 2313 b->donotstash = PETSC_FALSE; 2314 b->colmap = NULL; 2315 b->garray = NULL; 2316 b->roworiented = PETSC_TRUE; 2317 2318 /* stuff used in block assembly */ 2319 b->barray = 0; 2320 2321 /* stuff used for matrix vector multiply */ 2322 b->lvec = 0; 2323 b->Mvctx = 0; 2324 b->slvec0 = 0; 2325 b->slvec0b = 0; 2326 b->slvec1 = 0; 2327 b->slvec1a = 0; 2328 b->slvec1b = 0; 2329 b->sMvctx = 0; 2330 2331 /* stuff for MatGetRow() */ 2332 b->rowindices = 0; 2333 b->rowvalues = 0; 2334 b->getrowactive = PETSC_FALSE; 2335 2336 /* hash table stuff */ 2337 b->ht = 0; 2338 b->hd = 0; 2339 b->ht_size = 0; 2340 b->ht_flag = PETSC_FALSE; 2341 b->ht_fact = 0; 2342 b->ht_total_ct = 0; 2343 b->ht_insert_ct = 0; 2344 2345 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2346 b->ijonly = PETSC_FALSE; 2347 2348 b->in_loc = 0; 2349 b->v_loc = 0; 2350 b->n_loc = 0; 2351 2352 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2353 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2354 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2355 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2356 #if defined(PETSC_HAVE_ELEMENTAL) 2357 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2358 #endif 2359 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr); 2360 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr); 2361 2362 B->symmetric = PETSC_TRUE; 2363 B->structurally_symmetric = PETSC_TRUE; 2364 B->symmetric_set = PETSC_TRUE; 2365 B->structurally_symmetric_set = PETSC_TRUE; 2366 B->symmetric_eternal = PETSC_TRUE; 2367 2368 B->hermitian = PETSC_FALSE; 2369 B->hermitian_set = PETSC_FALSE; 2370 2371 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2372 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2373 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2374 if (flg) { 2375 PetscReal fact = 1.39; 2376 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2377 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2378 if (fact <= 1.0) fact = 1.39; 2379 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2380 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2381 } 2382 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2383 PetscFunctionReturn(0); 2384 } 2385 2386 /*MC 2387 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2388 2389 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2390 and MATMPISBAIJ otherwise. 2391 2392 Options Database Keys: 2393 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2394 2395 Level: beginner 2396 2397 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2398 M*/ 2399 2400 /*@C 2401 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2402 the user should preallocate the matrix storage by setting the parameters 2403 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2404 performance can be increased by more than a factor of 50. 2405 2406 Collective on Mat 2407 2408 Input Parameters: 2409 + B - the matrix 2410 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2411 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2412 . d_nz - number of block nonzeros per block row in diagonal portion of local 2413 submatrix (same for all local rows) 2414 . d_nnz - array containing the number of block nonzeros in the various block rows 2415 in the upper triangular and diagonal part of the in diagonal portion of the local 2416 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2417 for the diagonal entry and set a value even if it is zero. 2418 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2419 submatrix (same for all local rows). 2420 - o_nnz - array containing the number of nonzeros in the various block rows of the 2421 off-diagonal portion of the local submatrix that is right of the diagonal 2422 (possibly different for each block row) or NULL. 2423 2424 2425 Options Database Keys: 2426 + -mat_no_unroll - uses code that does not unroll the loops in the 2427 block calculations (much slower) 2428 - -mat_block_size - size of the blocks to use 2429 2430 Notes: 2431 2432 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2433 than it must be used on all processors that share the object for that argument. 2434 2435 If the *_nnz parameter is given then the *_nz parameter is ignored 2436 2437 Storage Information: 2438 For a square global matrix we define each processor's diagonal portion 2439 to be its local rows and the corresponding columns (a square submatrix); 2440 each processor's off-diagonal portion encompasses the remainder of the 2441 local matrix (a rectangular submatrix). 2442 2443 The user can specify preallocated storage for the diagonal part of 2444 the local submatrix with either d_nz or d_nnz (not both). Set 2445 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2446 memory allocation. Likewise, specify preallocated storage for the 2447 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2448 2449 You can call MatGetInfo() to get information on how effective the preallocation was; 2450 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2451 You can also run with the option -info and look for messages with the string 2452 malloc in them to see if additional memory allocation was needed. 2453 2454 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2455 the figure below we depict these three local rows and all columns (0-11). 2456 2457 .vb 2458 0 1 2 3 4 5 6 7 8 9 10 11 2459 -------------------------- 2460 row 3 |. . . d d d o o o o o o 2461 row 4 |. . . d d d o o o o o o 2462 row 5 |. . . d d d o o o o o o 2463 -------------------------- 2464 .ve 2465 2466 Thus, any entries in the d locations are stored in the d (diagonal) 2467 submatrix, and any entries in the o locations are stored in the 2468 o (off-diagonal) submatrix. Note that the d matrix is stored in 2469 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2470 2471 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2472 plus the diagonal part of the d matrix, 2473 and o_nz should indicate the number of block nonzeros per row in the o matrix 2474 2475 In general, for PDE problems in which most nonzeros are near the diagonal, 2476 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2477 or you will get TERRIBLE performance; see the users' manual chapter on 2478 matrices. 2479 2480 Level: intermediate 2481 2482 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2483 @*/ 2484 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2485 { 2486 PetscErrorCode ierr; 2487 2488 PetscFunctionBegin; 2489 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2490 PetscValidType(B,1); 2491 PetscValidLogicalCollectiveInt(B,bs,2); 2492 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2493 PetscFunctionReturn(0); 2494 } 2495 2496 /*@C 2497 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2498 (block compressed row). For good matrix assembly performance 2499 the user should preallocate the matrix storage by setting the parameters 2500 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2501 performance can be increased by more than a factor of 50. 2502 2503 Collective 2504 2505 Input Parameters: 2506 + comm - MPI communicator 2507 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2508 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2509 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2510 This value should be the same as the local size used in creating the 2511 y vector for the matrix-vector product y = Ax. 2512 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2513 This value should be the same as the local size used in creating the 2514 x vector for the matrix-vector product y = Ax. 2515 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2516 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2517 . d_nz - number of block nonzeros per block row in diagonal portion of local 2518 submatrix (same for all local rows) 2519 . d_nnz - array containing the number of block nonzeros in the various block rows 2520 in the upper triangular portion of the in diagonal portion of the local 2521 (possibly different for each block block row) or NULL. 2522 If you plan to factor the matrix you must leave room for the diagonal entry and 2523 set its value even if it is zero. 2524 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2525 submatrix (same for all local rows). 2526 - o_nnz - array containing the number of nonzeros in the various block rows of the 2527 off-diagonal portion of the local submatrix (possibly different for 2528 each block row) or NULL. 2529 2530 Output Parameter: 2531 . A - the matrix 2532 2533 Options Database Keys: 2534 + -mat_no_unroll - uses code that does not unroll the loops in the 2535 block calculations (much slower) 2536 . -mat_block_size - size of the blocks to use 2537 - -mat_mpi - use the parallel matrix data structures even on one processor 2538 (defaults to using SeqBAIJ format on one processor) 2539 2540 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2541 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2542 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2543 2544 Notes: 2545 The number of rows and columns must be divisible by blocksize. 2546 This matrix type does not support complex Hermitian operation. 2547 2548 The user MUST specify either the local or global matrix dimensions 2549 (possibly both). 2550 2551 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2552 than it must be used on all processors that share the object for that argument. 2553 2554 If the *_nnz parameter is given then the *_nz parameter is ignored 2555 2556 Storage Information: 2557 For a square global matrix we define each processor's diagonal portion 2558 to be its local rows and the corresponding columns (a square submatrix); 2559 each processor's off-diagonal portion encompasses the remainder of the 2560 local matrix (a rectangular submatrix). 2561 2562 The user can specify preallocated storage for the diagonal part of 2563 the local submatrix with either d_nz or d_nnz (not both). Set 2564 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2565 memory allocation. Likewise, specify preallocated storage for the 2566 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2567 2568 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2569 the figure below we depict these three local rows and all columns (0-11). 2570 2571 .vb 2572 0 1 2 3 4 5 6 7 8 9 10 11 2573 -------------------------- 2574 row 3 |. . . d d d o o o o o o 2575 row 4 |. . . d d d o o o o o o 2576 row 5 |. . . d d d o o o o o o 2577 -------------------------- 2578 .ve 2579 2580 Thus, any entries in the d locations are stored in the d (diagonal) 2581 submatrix, and any entries in the o locations are stored in the 2582 o (off-diagonal) submatrix. Note that the d matrix is stored in 2583 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2584 2585 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2586 plus the diagonal part of the d matrix, 2587 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2588 In general, for PDE problems in which most nonzeros are near the diagonal, 2589 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2590 or you will get TERRIBLE performance; see the users' manual chapter on 2591 matrices. 2592 2593 Level: intermediate 2594 2595 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2596 @*/ 2597 2598 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2599 { 2600 PetscErrorCode ierr; 2601 PetscMPIInt size; 2602 2603 PetscFunctionBegin; 2604 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2605 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2606 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2607 if (size > 1) { 2608 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2609 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2610 } else { 2611 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2612 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2613 } 2614 PetscFunctionReturn(0); 2615 } 2616 2617 2618 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2619 { 2620 Mat mat; 2621 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2622 PetscErrorCode ierr; 2623 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2624 PetscScalar *array; 2625 2626 PetscFunctionBegin; 2627 *newmat = 0; 2628 2629 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2630 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2631 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2632 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2633 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2634 2635 mat->factortype = matin->factortype; 2636 mat->preallocated = PETSC_TRUE; 2637 mat->assembled = PETSC_TRUE; 2638 mat->insertmode = NOT_SET_VALUES; 2639 2640 a = (Mat_MPISBAIJ*)mat->data; 2641 a->bs2 = oldmat->bs2; 2642 a->mbs = oldmat->mbs; 2643 a->nbs = oldmat->nbs; 2644 a->Mbs = oldmat->Mbs; 2645 a->Nbs = oldmat->Nbs; 2646 2647 a->size = oldmat->size; 2648 a->rank = oldmat->rank; 2649 a->donotstash = oldmat->donotstash; 2650 a->roworiented = oldmat->roworiented; 2651 a->rowindices = 0; 2652 a->rowvalues = 0; 2653 a->getrowactive = PETSC_FALSE; 2654 a->barray = 0; 2655 a->rstartbs = oldmat->rstartbs; 2656 a->rendbs = oldmat->rendbs; 2657 a->cstartbs = oldmat->cstartbs; 2658 a->cendbs = oldmat->cendbs; 2659 2660 /* hash table stuff */ 2661 a->ht = 0; 2662 a->hd = 0; 2663 a->ht_size = 0; 2664 a->ht_flag = oldmat->ht_flag; 2665 a->ht_fact = oldmat->ht_fact; 2666 a->ht_total_ct = 0; 2667 a->ht_insert_ct = 0; 2668 2669 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr); 2670 if (oldmat->colmap) { 2671 #if defined(PETSC_USE_CTABLE) 2672 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2673 #else 2674 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2675 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2676 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 2677 #endif 2678 } else a->colmap = 0; 2679 2680 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2681 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2682 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2683 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 2684 } else a->garray = 0; 2685 2686 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2687 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2688 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2689 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2690 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2691 2692 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2693 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2694 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2695 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2696 2697 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2698 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2699 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2700 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2701 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2702 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2703 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2704 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2705 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2706 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2707 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2708 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2709 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2710 2711 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2712 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2713 a->sMvctx = oldmat->sMvctx; 2714 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2715 2716 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2717 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2718 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2719 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2720 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2721 *newmat = mat; 2722 PetscFunctionReturn(0); 2723 } 2724 2725 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2726 { 2727 PetscErrorCode ierr; 2728 PetscInt i,nz,j,rstart,rend; 2729 PetscScalar *vals,*buf; 2730 MPI_Comm comm; 2731 MPI_Status status; 2732 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2733 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2734 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2735 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2736 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2737 PetscInt dcount,kmax,k,nzcount,tmp; 2738 int fd; 2739 PetscBool isbinary; 2740 2741 PetscFunctionBegin; 2742 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2743 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 2744 2745 /* force binary viewer to load .info file if it has not yet done so */ 2746 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2747 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2748 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2749 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2750 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2751 if (bs < 0) bs = 1; 2752 2753 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2754 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2755 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2756 if (!rank) { 2757 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2758 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2759 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2760 } 2761 2762 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2763 M = header[1]; 2764 N = header[2]; 2765 2766 /* If global sizes are set, check if they are consistent with that given in the file */ 2767 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2768 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2769 2770 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2771 2772 /* 2773 This code adds extra rows to make sure the number of rows is 2774 divisible by the blocksize 2775 */ 2776 Mbs = M/bs; 2777 extra_rows = bs - M + bs*(Mbs); 2778 if (extra_rows == bs) extra_rows = 0; 2779 else Mbs++; 2780 if (extra_rows &&!rank) { 2781 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2782 } 2783 2784 /* determine ownership of all rows */ 2785 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2786 mbs = Mbs/size + ((Mbs % size) > rank); 2787 m = mbs*bs; 2788 } else { /* User Set */ 2789 m = newmat->rmap->n; 2790 mbs = m/bs; 2791 } 2792 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2793 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2794 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2795 rowners[0] = 0; 2796 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2797 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2798 rstart = rowners[rank]; 2799 rend = rowners[rank+1]; 2800 2801 /* distribute row lengths to all processors */ 2802 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2803 if (!rank) { 2804 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2805 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 2806 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2807 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2808 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2809 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2810 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2811 } else { 2812 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2813 } 2814 2815 if (!rank) { /* procs[0] */ 2816 /* calculate the number of nonzeros on each processor */ 2817 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2818 for (i=0; i<size; i++) { 2819 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2820 procsnz[i] += rowlengths[j]; 2821 } 2822 } 2823 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2824 2825 /* determine max buffer needed and allocate it */ 2826 maxnz = 0; 2827 for (i=0; i<size; i++) { 2828 maxnz = PetscMax(maxnz,procsnz[i]); 2829 } 2830 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2831 2832 /* read in my part of the matrix column indices */ 2833 nz = procsnz[0]; 2834 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2835 mycols = ibuf; 2836 if (size == 1) nz -= extra_rows; 2837 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2838 if (size == 1) { 2839 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2840 } 2841 2842 /* read in every ones (except the last) and ship off */ 2843 for (i=1; i<size-1; i++) { 2844 nz = procsnz[i]; 2845 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2846 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2847 } 2848 /* read in the stuff for the last proc */ 2849 if (size != 1) { 2850 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2851 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2852 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2853 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2854 } 2855 ierr = PetscFree(cols);CHKERRQ(ierr); 2856 } else { /* procs[i], i>0 */ 2857 /* determine buffer space needed for message */ 2858 nz = 0; 2859 for (i=0; i<m; i++) nz += locrowlens[i]; 2860 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2861 mycols = ibuf; 2862 /* receive message of column indices*/ 2863 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2864 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2865 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2866 } 2867 2868 /* loop over local rows, determining number of off diagonal entries */ 2869 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2870 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2871 rowcount = 0; 2872 nzcount = 0; 2873 for (i=0; i<mbs; i++) { 2874 dcount = 0; 2875 odcount = 0; 2876 for (j=0; j<bs; j++) { 2877 kmax = locrowlens[rowcount]; 2878 for (k=0; k<kmax; k++) { 2879 tmp = mycols[nzcount++]/bs; /* block col. index */ 2880 if (!mask[tmp]) { 2881 mask[tmp] = 1; 2882 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2883 else masked1[dcount++] = tmp; /* entry in diag portion */ 2884 } 2885 } 2886 rowcount++; 2887 } 2888 2889 dlens[i] = dcount; /* d_nzz[i] */ 2890 odlens[i] = odcount; /* o_nzz[i] */ 2891 2892 /* zero out the mask elements we set */ 2893 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2894 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2895 } 2896 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2897 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2898 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2899 2900 if (!rank) { 2901 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2902 /* read in my part of the matrix numerical values */ 2903 nz = procsnz[0]; 2904 vals = buf; 2905 mycols = ibuf; 2906 if (size == 1) nz -= extra_rows; 2907 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2908 if (size == 1) { 2909 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2910 } 2911 2912 /* insert into matrix */ 2913 jj = rstart*bs; 2914 for (i=0; i<m; i++) { 2915 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2916 mycols += locrowlens[i]; 2917 vals += locrowlens[i]; 2918 jj++; 2919 } 2920 2921 /* read in other processors (except the last one) and ship out */ 2922 for (i=1; i<size-1; i++) { 2923 nz = procsnz[i]; 2924 vals = buf; 2925 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2926 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2927 } 2928 /* the last proc */ 2929 if (size != 1) { 2930 nz = procsnz[i] - extra_rows; 2931 vals = buf; 2932 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2933 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2934 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2935 } 2936 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2937 2938 } else { 2939 /* receive numeric values */ 2940 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2941 2942 /* receive message of values*/ 2943 vals = buf; 2944 mycols = ibuf; 2945 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2946 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2947 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2948 2949 /* insert into matrix */ 2950 jj = rstart*bs; 2951 for (i=0; i<m; i++) { 2952 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2953 mycols += locrowlens[i]; 2954 vals += locrowlens[i]; 2955 jj++; 2956 } 2957 } 2958 2959 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2960 ierr = PetscFree(buf);CHKERRQ(ierr); 2961 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2962 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2963 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2964 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2965 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2966 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2967 PetscFunctionReturn(0); 2968 } 2969 2970 /*XXXXX@ 2971 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2972 2973 Input Parameters: 2974 . mat - the matrix 2975 . fact - factor 2976 2977 Not Collective on Mat, each process can have a different hash factor 2978 2979 Level: advanced 2980 2981 Notes: 2982 This can also be set by the command line option: -mat_use_hash_table fact 2983 2984 .seealso: MatSetOption() 2985 @XXXXX*/ 2986 2987 2988 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2989 { 2990 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2991 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2992 PetscReal atmp; 2993 PetscReal *work,*svalues,*rvalues; 2994 PetscErrorCode ierr; 2995 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2996 PetscMPIInt rank,size; 2997 PetscInt *rowners_bs,dest,count,source; 2998 PetscScalar *va; 2999 MatScalar *ba; 3000 MPI_Status stat; 3001 3002 PetscFunctionBegin; 3003 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 3004 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 3005 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3006 3007 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3008 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3009 3010 bs = A->rmap->bs; 3011 mbs = a->mbs; 3012 Mbs = a->Mbs; 3013 ba = b->a; 3014 bi = b->i; 3015 bj = b->j; 3016 3017 /* find ownerships */ 3018 rowners_bs = A->rmap->range; 3019 3020 /* each proc creates an array to be distributed */ 3021 ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr); 3022 3023 /* row_max for B */ 3024 if (rank != size-1) { 3025 for (i=0; i<mbs; i++) { 3026 ncols = bi[1] - bi[0]; bi++; 3027 brow = bs*i; 3028 for (j=0; j<ncols; j++) { 3029 bcol = bs*(*bj); 3030 for (kcol=0; kcol<bs; kcol++) { 3031 col = bcol + kcol; /* local col index */ 3032 col += rowners_bs[rank+1]; /* global col index */ 3033 for (krow=0; krow<bs; krow++) { 3034 atmp = PetscAbsScalar(*ba); ba++; 3035 row = brow + krow; /* local row index */ 3036 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 3037 if (work[col] < atmp) work[col] = atmp; 3038 } 3039 } 3040 bj++; 3041 } 3042 } 3043 3044 /* send values to its owners */ 3045 for (dest=rank+1; dest<size; dest++) { 3046 svalues = work + rowners_bs[dest]; 3047 count = rowners_bs[dest+1]-rowners_bs[dest]; 3048 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 3049 } 3050 } 3051 3052 /* receive values */ 3053 if (rank) { 3054 rvalues = work; 3055 count = rowners_bs[rank+1]-rowners_bs[rank]; 3056 for (source=0; source<rank; source++) { 3057 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 3058 /* process values */ 3059 for (i=0; i<count; i++) { 3060 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 3061 } 3062 } 3063 } 3064 3065 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3066 ierr = PetscFree(work);CHKERRQ(ierr); 3067 PetscFunctionReturn(0); 3068 } 3069 3070 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3071 { 3072 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3073 PetscErrorCode ierr; 3074 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 3075 PetscScalar *x,*ptr,*from; 3076 Vec bb1; 3077 const PetscScalar *b; 3078 3079 PetscFunctionBegin; 3080 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3081 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3082 3083 if (flag == SOR_APPLY_UPPER) { 3084 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3085 PetscFunctionReturn(0); 3086 } 3087 3088 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3089 if (flag & SOR_ZERO_INITIAL_GUESS) { 3090 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3091 its--; 3092 } 3093 3094 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3095 while (its--) { 3096 3097 /* lower triangular part: slvec0b = - B^T*xx */ 3098 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3099 3100 /* copy xx into slvec0a */ 3101 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3102 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3103 ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr); 3104 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3105 3106 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3107 3108 /* copy bb into slvec1a */ 3109 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3110 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3111 ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr); 3112 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3113 3114 /* set slvec1b = 0 */ 3115 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3116 3117 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3118 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3119 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3120 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3121 3122 /* upper triangular part: bb1 = bb1 - B*x */ 3123 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3124 3125 /* local diagonal sweep */ 3126 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3127 } 3128 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3129 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3130 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3131 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3132 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3133 } else if (flag & SOR_EISENSTAT) { 3134 Vec xx1; 3135 PetscBool hasop; 3136 const PetscScalar *diag; 3137 PetscScalar *sl,scale = (omega - 2.0)/omega; 3138 PetscInt i,n; 3139 3140 if (!mat->xx1) { 3141 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3142 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3143 } 3144 xx1 = mat->xx1; 3145 bb1 = mat->bb1; 3146 3147 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3148 3149 if (!mat->diag) { 3150 /* this is wrong for same matrix with new nonzero values */ 3151 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3152 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3153 } 3154 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3155 3156 if (hasop) { 3157 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3158 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3159 } else { 3160 /* 3161 These two lines are replaced by code that may be a bit faster for a good compiler 3162 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3163 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3164 */ 3165 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3166 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3167 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3168 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3169 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3170 if (omega == 1.0) { 3171 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3172 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3173 } else { 3174 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3175 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3176 } 3177 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3178 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3179 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3180 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3181 } 3182 3183 /* multiply off-diagonal portion of matrix */ 3184 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3185 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3186 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3187 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3188 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 3189 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3190 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3191 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3192 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3193 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3194 3195 /* local sweep */ 3196 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3197 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3198 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3199 PetscFunctionReturn(0); 3200 } 3201 3202 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3203 { 3204 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3205 PetscErrorCode ierr; 3206 Vec lvec1,bb1; 3207 3208 PetscFunctionBegin; 3209 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3210 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3211 3212 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3213 if (flag & SOR_ZERO_INITIAL_GUESS) { 3214 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3215 its--; 3216 } 3217 3218 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3219 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3220 while (its--) { 3221 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3222 3223 /* lower diagonal part: bb1 = bb - B^T*xx */ 3224 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3225 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3226 3227 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3228 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3229 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3230 3231 /* upper diagonal part: bb1 = bb1 - B*x */ 3232 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3233 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3234 3235 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3236 3237 /* diagonal sweep */ 3238 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3239 } 3240 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3241 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3242 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3243 PetscFunctionReturn(0); 3244 } 3245 3246 /*@ 3247 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3248 CSR format the local rows. 3249 3250 Collective 3251 3252 Input Parameters: 3253 + comm - MPI communicator 3254 . bs - the block size, only a block size of 1 is supported 3255 . m - number of local rows (Cannot be PETSC_DECIDE) 3256 . n - This value should be the same as the local size used in creating the 3257 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3258 calculated if N is given) For square matrices n is almost always m. 3259 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3260 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3261 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 3262 . j - column indices 3263 - a - matrix values 3264 3265 Output Parameter: 3266 . mat - the matrix 3267 3268 Level: intermediate 3269 3270 Notes: 3271 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3272 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3273 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3274 3275 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3276 3277 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3278 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3279 @*/ 3280 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3281 { 3282 PetscErrorCode ierr; 3283 3284 3285 PetscFunctionBegin; 3286 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3287 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3288 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3289 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3290 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3291 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3292 PetscFunctionReturn(0); 3293 } 3294 3295 3296 /*@C 3297 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 3298 3299 Collective 3300 3301 Input Parameters: 3302 + B - the matrix 3303 . bs - the block size 3304 . i - the indices into j for the start of each local row (starts with zero) 3305 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3306 - v - optional values in the matrix 3307 3308 Level: advanced 3309 3310 Notes: 3311 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 3312 and usually the numerical values as well 3313 3314 Any entries below the diagonal are ignored 3315 3316 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3317 @*/ 3318 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3319 { 3320 PetscErrorCode ierr; 3321 3322 PetscFunctionBegin; 3323 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3324 PetscFunctionReturn(0); 3325 } 3326 3327 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3328 { 3329 PetscErrorCode ierr; 3330 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3331 PetscInt *indx; 3332 PetscScalar *values; 3333 3334 PetscFunctionBegin; 3335 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3336 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3337 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3338 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3339 PetscInt *bindx,rmax=a->rmax,j; 3340 PetscMPIInt rank,size; 3341 3342 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3343 mbs = m/bs; Nbs = N/cbs; 3344 if (n == PETSC_DECIDE) { 3345 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 3346 } 3347 nbs = n/cbs; 3348 3349 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3350 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3351 3352 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3353 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 3354 if (rank == size-1) { 3355 /* Check sum(nbs) = Nbs */ 3356 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3357 } 3358 3359 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3360 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3361 for (i=0; i<mbs; i++) { 3362 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3363 nnz = nnz/bs; 3364 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3365 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3366 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3367 } 3368 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3369 ierr = PetscFree(bindx);CHKERRQ(ierr); 3370 3371 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3372 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3373 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3374 ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr); 3375 ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3376 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3377 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3378 } 3379 3380 /* numeric phase */ 3381 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3382 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3383 3384 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3385 for (i=0; i<m; i++) { 3386 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3387 Ii = i + rstart; 3388 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3389 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3390 } 3391 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3392 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3393 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3394 PetscFunctionReturn(0); 3395 } 3396