1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 /* This could be moved to matimpl.h */ 12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 13 { 14 Mat preallocator; 15 PetscInt r,rstart,rend; 16 PetscInt bs,i,m,n,M,N; 17 PetscBool cong = PETSC_TRUE; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 22 PetscValidLogicalCollectiveInt(B,nm,2); 23 for (i = 0; i < nm; i++) { 24 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 25 ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 26 if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 27 } 28 PetscValidLogicalCollectiveBool(B,fill,5); 29 ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 30 ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 31 ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 32 ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 33 ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 34 ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 35 ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 36 ierr = MatSetUp(preallocator);CHKERRQ(ierr); 37 ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 38 for (r = rstart; r < rend; ++r) { 39 PetscInt ncols; 40 const PetscInt *row; 41 const PetscScalar *vals; 42 43 for (i = 0; i < nm; i++) { 44 ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 45 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 46 if (symm && symm[i]) { 47 ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 48 } 49 ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 50 } 51 } 52 ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 53 ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 54 ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 55 ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 static PetscErrorCode MatConvert_MPISBAIJ_XAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 60 { 61 Mat B; 62 PetscErrorCode ierr; 63 PetscInt r; 64 65 PetscFunctionBegin; 66 if (reuse != MAT_REUSE_MATRIX) { 67 PetscBool symm = PETSC_TRUE; 68 PetscInt bs; 69 70 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 71 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 72 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 73 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 74 ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 75 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 76 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 77 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 78 ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 79 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 80 } else B = *newmat; 81 82 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 83 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 84 PetscInt ncols; 85 const PetscInt *row; 86 const PetscScalar *vals; 87 88 ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 89 ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 90 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 91 ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 92 } 93 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 94 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96 97 if (reuse == MAT_INPLACE_MATRIX) { 98 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 99 } else { 100 *newmat = B; 101 } 102 PetscFunctionReturn(0); 103 } 104 105 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 106 { 107 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 108 PetscErrorCode ierr; 109 110 PetscFunctionBegin; 111 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 112 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 117 { 118 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 119 PetscErrorCode ierr; 120 121 PetscFunctionBegin; 122 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 123 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 124 PetscFunctionReturn(0); 125 } 126 127 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 128 { \ 129 brow = row/bs; \ 130 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 131 rmax = aimax[brow]; nrow = ailen[brow]; \ 132 bcol = col/bs; \ 133 ridx = row % bs; cidx = col % bs; \ 134 low = 0; high = nrow; \ 135 while (high-low > 3) { \ 136 t = (low+high)/2; \ 137 if (rp[t] > bcol) high = t; \ 138 else low = t; \ 139 } \ 140 for (_i=low; _i<high; _i++) { \ 141 if (rp[_i] > bcol) break; \ 142 if (rp[_i] == bcol) { \ 143 bap = ap + bs2*_i + bs*cidx + ridx; \ 144 if (addv == ADD_VALUES) *bap += value; \ 145 else *bap = value; \ 146 goto a_noinsert; \ 147 } \ 148 } \ 149 if (a->nonew == 1) goto a_noinsert; \ 150 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 151 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 152 N = nrow++ - 1; \ 153 /* shift up all the later entries in this row */ \ 154 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 155 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 156 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 157 rp[_i] = bcol; \ 158 ap[bs2*_i + bs*cidx + ridx] = value; \ 159 A->nonzerostate++;\ 160 a_noinsert:; \ 161 ailen[brow] = nrow; \ 162 } 163 164 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 165 { \ 166 brow = row/bs; \ 167 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 168 rmax = bimax[brow]; nrow = bilen[brow]; \ 169 bcol = col/bs; \ 170 ridx = row % bs; cidx = col % bs; \ 171 low = 0; high = nrow; \ 172 while (high-low > 3) { \ 173 t = (low+high)/2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i=low; _i<high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2*_i + bs*cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto b_noinsert; \ 184 } \ 185 } \ 186 if (b->nonew == 1) goto b_noinsert; \ 187 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 192 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 193 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 194 rp[_i] = bcol; \ 195 ap[bs2*_i + bs*cidx + ridx] = value; \ 196 B->nonzerostate++;\ 197 b_noinsert:; \ 198 bilen[brow] = nrow; \ 199 } 200 201 /* Only add/insert a(i,j) with i<=j (blocks). 202 Any a(i,j) with i>j input by user is ingored or generates an error 203 */ 204 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 205 { 206 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 207 MatScalar value; 208 PetscBool roworiented = baij->roworiented; 209 PetscErrorCode ierr; 210 PetscInt i,j,row,col; 211 PetscInt rstart_orig=mat->rmap->rstart; 212 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 213 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 214 215 /* Some Variables required in the macro */ 216 Mat A = baij->A; 217 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 218 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 219 MatScalar *aa =a->a; 220 221 Mat B = baij->B; 222 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 223 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 224 MatScalar *ba =b->a; 225 226 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 227 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 228 MatScalar *ap,*bap; 229 230 /* for stash */ 231 PetscInt n_loc, *in_loc = NULL; 232 MatScalar *v_loc = NULL; 233 234 PetscFunctionBegin; 235 if (!baij->donotstash) { 236 if (n > baij->n_loc) { 237 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 238 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 239 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 240 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 241 242 baij->n_loc = n; 243 } 244 in_loc = baij->in_loc; 245 v_loc = baij->v_loc; 246 } 247 248 for (i=0; i<m; i++) { 249 if (im[i] < 0) continue; 250 #if defined(PETSC_USE_DEBUG) 251 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 252 #endif 253 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 254 row = im[i] - rstart_orig; /* local row index */ 255 for (j=0; j<n; j++) { 256 if (im[i]/bs > in[j]/bs) { 257 if (a->ignore_ltriangular) { 258 continue; /* ignore lower triangular blocks */ 259 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 260 } 261 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 262 col = in[j] - cstart_orig; /* local col index */ 263 brow = row/bs; bcol = col/bs; 264 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 265 if (roworiented) value = v[i*n+j]; 266 else value = v[i+j*m]; 267 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 268 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 269 } else if (in[j] < 0) continue; 270 #if defined(PETSC_USE_DEBUG) 271 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 272 #endif 273 else { /* off-diag entry (B) */ 274 if (mat->was_assembled) { 275 if (!baij->colmap) { 276 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 277 } 278 #if defined(PETSC_USE_CTABLE) 279 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 280 col = col - 1; 281 #else 282 col = baij->colmap[in[j]/bs] - 1; 283 #endif 284 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 285 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 286 col = in[j]; 287 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 288 B = baij->B; 289 b = (Mat_SeqBAIJ*)(B)->data; 290 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 291 ba = b->a; 292 } else col += in[j]%bs; 293 } else col = in[j]; 294 if (roworiented) value = v[i*n+j]; 295 else value = v[i+j*m]; 296 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 297 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 298 } 299 } 300 } else { /* off processor entry */ 301 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 302 if (!baij->donotstash) { 303 mat->assembled = PETSC_FALSE; 304 n_loc = 0; 305 for (j=0; j<n; j++) { 306 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 307 in_loc[n_loc] = in[j]; 308 if (roworiented) { 309 v_loc[n_loc] = v[i*n+j]; 310 } else { 311 v_loc[n_loc] = v[j*m+i]; 312 } 313 n_loc++; 314 } 315 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 316 } 317 } 318 } 319 PetscFunctionReturn(0); 320 } 321 322 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 323 { 324 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 325 PetscErrorCode ierr; 326 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 327 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 328 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 329 PetscBool roworiented=a->roworiented; 330 const PetscScalar *value = v; 331 MatScalar *ap,*aa = a->a,*bap; 332 333 PetscFunctionBegin; 334 if (col < row) { 335 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 336 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 337 } 338 rp = aj + ai[row]; 339 ap = aa + bs2*ai[row]; 340 rmax = imax[row]; 341 nrow = ailen[row]; 342 value = v; 343 low = 0; 344 high = nrow; 345 346 while (high-low > 7) { 347 t = (low+high)/2; 348 if (rp[t] > col) high = t; 349 else low = t; 350 } 351 for (i=low; i<high; i++) { 352 if (rp[i] > col) break; 353 if (rp[i] == col) { 354 bap = ap + bs2*i; 355 if (roworiented) { 356 if (is == ADD_VALUES) { 357 for (ii=0; ii<bs; ii++) { 358 for (jj=ii; jj<bs2; jj+=bs) { 359 bap[jj] += *value++; 360 } 361 } 362 } else { 363 for (ii=0; ii<bs; ii++) { 364 for (jj=ii; jj<bs2; jj+=bs) { 365 bap[jj] = *value++; 366 } 367 } 368 } 369 } else { 370 if (is == ADD_VALUES) { 371 for (ii=0; ii<bs; ii++) { 372 for (jj=0; jj<bs; jj++) { 373 *bap++ += *value++; 374 } 375 } 376 } else { 377 for (ii=0; ii<bs; ii++) { 378 for (jj=0; jj<bs; jj++) { 379 *bap++ = *value++; 380 } 381 } 382 } 383 } 384 goto noinsert2; 385 } 386 } 387 if (nonew == 1) goto noinsert2; 388 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 389 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 390 N = nrow++ - 1; high++; 391 /* shift up all the later entries in this row */ 392 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 393 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 394 rp[i] = col; 395 bap = ap + bs2*i; 396 if (roworiented) { 397 for (ii=0; ii<bs; ii++) { 398 for (jj=ii; jj<bs2; jj+=bs) { 399 bap[jj] = *value++; 400 } 401 } 402 } else { 403 for (ii=0; ii<bs; ii++) { 404 for (jj=0; jj<bs; jj++) { 405 *bap++ = *value++; 406 } 407 } 408 } 409 noinsert2:; 410 ailen[row] = nrow; 411 PetscFunctionReturn(0); 412 } 413 414 /* 415 This routine is exactly duplicated in mpibaij.c 416 */ 417 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 418 { 419 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 420 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 421 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 422 PetscErrorCode ierr; 423 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 424 PetscBool roworiented=a->roworiented; 425 const PetscScalar *value = v; 426 MatScalar *ap,*aa = a->a,*bap; 427 428 PetscFunctionBegin; 429 rp = aj + ai[row]; 430 ap = aa + bs2*ai[row]; 431 rmax = imax[row]; 432 nrow = ailen[row]; 433 low = 0; 434 high = nrow; 435 value = v; 436 while (high-low > 7) { 437 t = (low+high)/2; 438 if (rp[t] > col) high = t; 439 else low = t; 440 } 441 for (i=low; i<high; i++) { 442 if (rp[i] > col) break; 443 if (rp[i] == col) { 444 bap = ap + bs2*i; 445 if (roworiented) { 446 if (is == ADD_VALUES) { 447 for (ii=0; ii<bs; ii++) { 448 for (jj=ii; jj<bs2; jj+=bs) { 449 bap[jj] += *value++; 450 } 451 } 452 } else { 453 for (ii=0; ii<bs; ii++) { 454 for (jj=ii; jj<bs2; jj+=bs) { 455 bap[jj] = *value++; 456 } 457 } 458 } 459 } else { 460 if (is == ADD_VALUES) { 461 for (ii=0; ii<bs; ii++,value+=bs) { 462 for (jj=0; jj<bs; jj++) { 463 bap[jj] += value[jj]; 464 } 465 bap += bs; 466 } 467 } else { 468 for (ii=0; ii<bs; ii++,value+=bs) { 469 for (jj=0; jj<bs; jj++) { 470 bap[jj] = value[jj]; 471 } 472 bap += bs; 473 } 474 } 475 } 476 goto noinsert2; 477 } 478 } 479 if (nonew == 1) goto noinsert2; 480 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 481 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 482 N = nrow++ - 1; high++; 483 /* shift up all the later entries in this row */ 484 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 485 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 486 rp[i] = col; 487 bap = ap + bs2*i; 488 if (roworiented) { 489 for (ii=0; ii<bs; ii++) { 490 for (jj=ii; jj<bs2; jj+=bs) { 491 bap[jj] = *value++; 492 } 493 } 494 } else { 495 for (ii=0; ii<bs; ii++) { 496 for (jj=0; jj<bs; jj++) { 497 *bap++ = *value++; 498 } 499 } 500 } 501 noinsert2:; 502 ailen[row] = nrow; 503 PetscFunctionReturn(0); 504 } 505 506 /* 507 This routine could be optimized by removing the need for the block copy below and passing stride information 508 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 509 */ 510 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 511 { 512 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 513 const MatScalar *value; 514 MatScalar *barray =baij->barray; 515 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 516 PetscErrorCode ierr; 517 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 518 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 519 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 520 521 PetscFunctionBegin; 522 if (!barray) { 523 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 524 baij->barray = barray; 525 } 526 527 if (roworiented) { 528 stepval = (n-1)*bs; 529 } else { 530 stepval = (m-1)*bs; 531 } 532 for (i=0; i<m; i++) { 533 if (im[i] < 0) continue; 534 #if defined(PETSC_USE_DEBUG) 535 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 536 #endif 537 if (im[i] >= rstart && im[i] < rend) { 538 row = im[i] - rstart; 539 for (j=0; j<n; j++) { 540 if (im[i] > in[j]) { 541 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 542 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 543 } 544 /* If NumCol = 1 then a copy is not required */ 545 if ((roworiented) && (n == 1)) { 546 barray = (MatScalar*) v + i*bs2; 547 } else if ((!roworiented) && (m == 1)) { 548 barray = (MatScalar*) v + j*bs2; 549 } else { /* Here a copy is required */ 550 if (roworiented) { 551 value = v + i*(stepval+bs)*bs + j*bs; 552 } else { 553 value = v + j*(stepval+bs)*bs + i*bs; 554 } 555 for (ii=0; ii<bs; ii++,value+=stepval) { 556 for (jj=0; jj<bs; jj++) { 557 *barray++ = *value++; 558 } 559 } 560 barray -=bs2; 561 } 562 563 if (in[j] >= cstart && in[j] < cend) { 564 col = in[j] - cstart; 565 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 566 } else if (in[j] < 0) continue; 567 #if defined(PETSC_USE_DEBUG) 568 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 569 #endif 570 else { 571 if (mat->was_assembled) { 572 if (!baij->colmap) { 573 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 574 } 575 576 #if defined(PETSC_USE_DEBUG) 577 #if defined(PETSC_USE_CTABLE) 578 { PetscInt data; 579 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 580 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 581 } 582 #else 583 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 584 #endif 585 #endif 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col = (col - 1)/bs; 589 #else 590 col = (baij->colmap[in[j]] - 1)/bs; 591 #endif 592 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 593 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 } 596 } else col = in[j]; 597 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 598 } 599 } 600 } else { 601 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 602 if (!baij->donotstash) { 603 if (roworiented) { 604 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 605 } else { 606 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 607 } 608 } 609 } 610 } 611 PetscFunctionReturn(0); 612 } 613 614 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 615 { 616 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 617 PetscErrorCode ierr; 618 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 619 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 620 621 PetscFunctionBegin; 622 for (i=0; i<m; i++) { 623 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 624 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 625 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 626 row = idxm[i] - bsrstart; 627 for (j=0; j<n; j++) { 628 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 629 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 630 if (idxn[j] >= bscstart && idxn[j] < bscend) { 631 col = idxn[j] - bscstart; 632 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } else { 634 if (!baij->colmap) { 635 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 636 } 637 #if defined(PETSC_USE_CTABLE) 638 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 639 data--; 640 #else 641 data = baij->colmap[idxn[j]/bs]-1; 642 #endif 643 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 644 else { 645 col = data + idxn[j]%bs; 646 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } 648 } 649 } 650 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 651 } 652 PetscFunctionReturn(0); 653 } 654 655 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 656 { 657 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 658 PetscErrorCode ierr; 659 PetscReal sum[2],*lnorm2; 660 661 PetscFunctionBegin; 662 if (baij->size == 1) { 663 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 664 } else { 665 if (type == NORM_FROBENIUS) { 666 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 667 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 668 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 669 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 670 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 671 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 672 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 673 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 674 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 675 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 676 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 677 PetscReal *rsum,*rsum2,vabs; 678 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 679 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 680 MatScalar *v; 681 682 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 683 ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr); 684 /* Amat */ 685 v = amat->a; jj = amat->j; 686 for (brow=0; brow<mbs; brow++) { 687 grow = bs*(rstart + brow); 688 nz = amat->i[brow+1] - amat->i[brow]; 689 for (bcol=0; bcol<nz; bcol++) { 690 gcol = bs*(rstart + *jj); jj++; 691 for (col=0; col<bs; col++) { 692 for (row=0; row<bs; row++) { 693 vabs = PetscAbsScalar(*v); v++; 694 rsum[gcol+col] += vabs; 695 /* non-diagonal block */ 696 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 697 } 698 } 699 } 700 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 701 } 702 /* Bmat */ 703 v = bmat->a; jj = bmat->j; 704 for (brow=0; brow<mbs; brow++) { 705 grow = bs*(rstart + brow); 706 nz = bmat->i[brow+1] - bmat->i[brow]; 707 for (bcol=0; bcol<nz; bcol++) { 708 gcol = bs*garray[*jj]; jj++; 709 for (col=0; col<bs; col++) { 710 for (row=0; row<bs; row++) { 711 vabs = PetscAbsScalar(*v); v++; 712 rsum[gcol+col] += vabs; 713 rsum[grow+row] += vabs; 714 } 715 } 716 } 717 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 718 } 719 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 720 *norm = 0.0; 721 for (col=0; col<mat->cmap->N; col++) { 722 if (rsum2[col] > *norm) *norm = rsum2[col]; 723 } 724 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 725 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 726 } 727 PetscFunctionReturn(0); 728 } 729 730 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 731 { 732 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 733 PetscErrorCode ierr; 734 PetscInt nstash,reallocs; 735 736 PetscFunctionBegin; 737 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 738 739 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 740 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 741 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 742 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 743 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 744 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 749 { 750 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 751 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 752 PetscErrorCode ierr; 753 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 754 PetscInt *row,*col; 755 PetscBool other_disassembled; 756 PetscMPIInt n; 757 PetscBool r1,r2,r3; 758 MatScalar *val; 759 760 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 761 PetscFunctionBegin; 762 if (!baij->donotstash && !mat->nooffprocentries) { 763 while (1) { 764 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 765 if (!flg) break; 766 767 for (i=0; i<n;) { 768 /* Now identify the consecutive vals belonging to the same row */ 769 for (j=i,rstart=row[j]; j<n; j++) { 770 if (row[j] != rstart) break; 771 } 772 if (j < n) ncols = j-i; 773 else ncols = n-i; 774 /* Now assemble all these values with a single function call */ 775 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 776 i = j; 777 } 778 } 779 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 780 /* Now process the block-stash. Since the values are stashed column-oriented, 781 set the roworiented flag to column oriented, and after MatSetValues() 782 restore the original flags */ 783 r1 = baij->roworiented; 784 r2 = a->roworiented; 785 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 786 787 baij->roworiented = PETSC_FALSE; 788 a->roworiented = PETSC_FALSE; 789 790 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 791 while (1) { 792 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 793 if (!flg) break; 794 795 for (i=0; i<n;) { 796 /* Now identify the consecutive vals belonging to the same row */ 797 for (j=i,rstart=row[j]; j<n; j++) { 798 if (row[j] != rstart) break; 799 } 800 if (j < n) ncols = j-i; 801 else ncols = n-i; 802 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 803 i = j; 804 } 805 } 806 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 807 808 baij->roworiented = r1; 809 a->roworiented = r2; 810 811 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 812 } 813 814 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 816 817 /* determine if any processor has disassembled, if so we must 818 also disassemble ourselfs, in order that we may reassemble. */ 819 /* 820 if nonzero structure of submatrix B cannot change then we know that 821 no processor disassembled thus we can skip this stuff 822 */ 823 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 824 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 825 if (mat->was_assembled && !other_disassembled) { 826 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 827 } 828 } 829 830 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 831 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 832 } 833 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 835 836 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 837 838 baij->rowvalues = 0; 839 840 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 841 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 842 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 843 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 844 } 845 PetscFunctionReturn(0); 846 } 847 848 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 849 #include <petscdraw.h> 850 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 851 { 852 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 853 PetscErrorCode ierr; 854 PetscInt bs = mat->rmap->bs; 855 PetscMPIInt rank = baij->rank; 856 PetscBool iascii,isdraw; 857 PetscViewer sviewer; 858 PetscViewerFormat format; 859 860 PetscFunctionBegin; 861 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 862 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 863 if (iascii) { 864 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 865 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 866 MatInfo info; 867 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 868 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 869 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 870 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 871 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 872 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 873 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 874 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 875 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 876 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 877 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 878 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } else if (format == PETSC_VIEWER_ASCII_INFO) { 881 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 882 PetscFunctionReturn(0); 883 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 884 PetscFunctionReturn(0); 885 } 886 } 887 888 if (isdraw) { 889 PetscDraw draw; 890 PetscBool isnull; 891 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 892 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 893 if (isnull) PetscFunctionReturn(0); 894 } 895 896 { 897 /* assemble the entire matrix onto first processor. */ 898 Mat A; 899 Mat_SeqSBAIJ *Aloc; 900 Mat_SeqBAIJ *Bloc; 901 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 902 MatScalar *a; 903 const char *matname; 904 905 /* Should this be the same type as mat? */ 906 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 907 if (!rank) { 908 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 909 } else { 910 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 911 } 912 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 913 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 914 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 915 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 916 917 /* copy over the A part */ 918 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 919 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 920 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 921 922 for (i=0; i<mbs; i++) { 923 rvals[0] = bs*(baij->rstartbs + i); 924 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 925 for (j=ai[i]; j<ai[i+1]; j++) { 926 col = (baij->cstartbs+aj[j])*bs; 927 for (k=0; k<bs; k++) { 928 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 929 col++; 930 a += bs; 931 } 932 } 933 } 934 /* copy over the B part */ 935 Bloc = (Mat_SeqBAIJ*)baij->B->data; 936 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 937 for (i=0; i<mbs; i++) { 938 939 rvals[0] = bs*(baij->rstartbs + i); 940 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 941 for (j=ai[i]; j<ai[i+1]; j++) { 942 col = baij->garray[aj[j]]*bs; 943 for (k=0; k<bs; k++) { 944 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 945 col++; 946 a += bs; 947 } 948 } 949 } 950 ierr = PetscFree(rvals);CHKERRQ(ierr); 951 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 952 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 953 /* 954 Everyone has to call to draw the matrix since the graphics waits are 955 synchronized across all processors that share the PetscDraw object 956 */ 957 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 958 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 959 if (!rank) { 960 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 961 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 962 } 963 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 964 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 965 ierr = MatDestroy(&A);CHKERRQ(ierr); 966 } 967 PetscFunctionReturn(0); 968 } 969 970 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 971 { 972 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 973 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 974 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 975 PetscErrorCode ierr; 976 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 977 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 978 int fd; 979 PetscScalar *column_values; 980 FILE *file; 981 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 982 PetscInt message_count,flowcontrolcount; 983 984 PetscFunctionBegin; 985 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 986 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 987 nz = bs2*(A->nz + B->nz); 988 rlen = mat->rmap->n; 989 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 990 if (!rank) { 991 header[0] = MAT_FILE_CLASSID; 992 header[1] = mat->rmap->N; 993 header[2] = mat->cmap->N; 994 995 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 996 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 997 /* get largest number of rows any processor has */ 998 range = mat->rmap->range; 999 for (i=1; i<size; i++) { 1000 rlen = PetscMax(rlen,range[i+1] - range[i]); 1001 } 1002 } else { 1003 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1004 } 1005 1006 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1007 /* compute lengths of each row */ 1008 for (i=0; i<a->mbs; i++) { 1009 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1010 } 1011 /* store the row lengths to the file */ 1012 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1013 if (!rank) { 1014 MPI_Status status; 1015 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1016 rlen = (range[1] - range[0])/bs; 1017 for (i=0; i<rlen; i++) { 1018 for (j=0; j<bs; j++) { 1019 row_lens[i*bs+j] = bs*crow_lens[i]; 1020 } 1021 } 1022 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1023 for (i=1; i<size; i++) { 1024 rlen = (range[i+1] - range[i])/bs; 1025 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1026 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1027 for (k=0; k<rlen; k++) { 1028 for (j=0; j<bs; j++) { 1029 row_lens[k*bs+j] = bs*crow_lens[k]; 1030 } 1031 } 1032 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1033 } 1034 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1035 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1036 } else { 1037 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1038 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1039 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1040 } 1041 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1042 1043 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1044 information needed to make it for each row from a block row. This does require more communication but still not more than 1045 the communication needed for the nonzero values */ 1046 nzmax = nz; /* space a largest processor needs */ 1047 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1048 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1049 cnt = 0; 1050 for (i=0; i<a->mbs; i++) { 1051 pcnt = cnt; 1052 for (j=B->i[i]; j<B->i[i+1]; j++) { 1053 if ((col = garray[B->j[j]]) > cstart) break; 1054 for (l=0; l<bs; l++) { 1055 column_indices[cnt++] = bs*col+l; 1056 } 1057 } 1058 for (k=A->i[i]; k<A->i[i+1]; k++) { 1059 for (l=0; l<bs; l++) { 1060 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1061 } 1062 } 1063 for (; j<B->i[i+1]; j++) { 1064 for (l=0; l<bs; l++) { 1065 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1066 } 1067 } 1068 len = cnt - pcnt; 1069 for (k=1; k<bs; k++) { 1070 ierr = PetscArraycpy(&column_indices[cnt],&column_indices[pcnt],len);CHKERRQ(ierr); 1071 cnt += len; 1072 } 1073 } 1074 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1075 1076 /* store the columns to the file */ 1077 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1078 if (!rank) { 1079 MPI_Status status; 1080 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1081 for (i=1; i<size; i++) { 1082 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1083 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1084 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1085 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1086 } 1087 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1088 } else { 1089 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1090 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1091 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1092 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1095 1096 /* load up the numerical values */ 1097 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1098 cnt = 0; 1099 for (i=0; i<a->mbs; i++) { 1100 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1101 for (j=B->i[i]; j<B->i[i+1]; j++) { 1102 if (garray[B->j[j]] > cstart) break; 1103 for (l=0; l<bs; l++) { 1104 for (ll=0; ll<bs; ll++) { 1105 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1106 } 1107 } 1108 cnt += bs; 1109 } 1110 for (k=A->i[i]; k<A->i[i+1]; k++) { 1111 for (l=0; l<bs; l++) { 1112 for (ll=0; ll<bs; ll++) { 1113 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1114 } 1115 } 1116 cnt += bs; 1117 } 1118 for (; j<B->i[i+1]; j++) { 1119 for (l=0; l<bs; l++) { 1120 for (ll=0; ll<bs; ll++) { 1121 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1122 } 1123 } 1124 cnt += bs; 1125 } 1126 cnt += (bs-1)*rlen; 1127 } 1128 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1129 1130 /* store the column values to the file */ 1131 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1132 if (!rank) { 1133 MPI_Status status; 1134 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1135 for (i=1; i<size; i++) { 1136 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1137 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1138 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1139 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1140 } 1141 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1142 } else { 1143 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1144 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1145 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1146 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1147 } 1148 ierr = PetscFree(column_values);CHKERRQ(ierr); 1149 1150 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1151 if (file) { 1152 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1153 } 1154 PetscFunctionReturn(0); 1155 } 1156 1157 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1158 { 1159 PetscErrorCode ierr; 1160 PetscBool iascii,isdraw,issocket,isbinary; 1161 1162 PetscFunctionBegin; 1163 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1164 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1165 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1166 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1167 if (iascii || isdraw || issocket) { 1168 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1169 } else if (isbinary) { 1170 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 1175 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1176 { 1177 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1178 PetscErrorCode ierr; 1179 1180 PetscFunctionBegin; 1181 #if defined(PETSC_USE_LOG) 1182 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1183 #endif 1184 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1185 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1186 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1187 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1188 #if defined(PETSC_USE_CTABLE) 1189 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1190 #else 1191 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1192 #endif 1193 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1194 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1195 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1196 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1197 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1198 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1199 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1200 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1201 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1202 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1203 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1204 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1205 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1206 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1207 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1208 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1209 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1210 #endif 1211 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1212 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1213 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1214 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1215 1216 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1221 #if defined(PETSC_HAVE_ELEMENTAL) 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1223 #endif 1224 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1230 { 1231 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1234 PetscScalar *from; 1235 const PetscScalar *x; 1236 1237 PetscFunctionBegin; 1238 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1239 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1240 1241 /* diagonal part */ 1242 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1243 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1244 1245 /* subdiagonal part */ 1246 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1247 1248 /* copy x into the vec slvec0 */ 1249 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1250 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1251 1252 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1253 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1254 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1255 1256 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1257 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1258 /* supperdiagonal part */ 1259 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1264 { 1265 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1266 PetscErrorCode ierr; 1267 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1268 PetscScalar *from; 1269 const PetscScalar *x; 1270 1271 PetscFunctionBegin; 1272 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1273 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1274 1275 /* diagonal part */ 1276 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1277 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1278 1279 /* subdiagonal part */ 1280 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1281 1282 /* copy x into the vec slvec0 */ 1283 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1284 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1285 1286 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1287 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1288 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1289 1290 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1291 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1292 /* supperdiagonal part */ 1293 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1298 { 1299 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1300 PetscErrorCode ierr; 1301 PetscInt nt; 1302 1303 PetscFunctionBegin; 1304 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1305 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1306 1307 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1308 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1309 1310 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1311 /* do diagonal part */ 1312 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1313 /* do supperdiagonal part */ 1314 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1315 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1316 /* do subdiagonal part */ 1317 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1318 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1319 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1324 { 1325 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1326 PetscErrorCode ierr; 1327 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1328 PetscScalar *from,zero=0.0; 1329 const PetscScalar *x; 1330 1331 PetscFunctionBegin; 1332 /* 1333 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1334 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1335 */ 1336 /* diagonal part */ 1337 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1338 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1339 1340 /* subdiagonal part */ 1341 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1342 1343 /* copy x into the vec slvec0 */ 1344 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1345 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1346 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1347 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1348 1349 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1350 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1351 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1352 1353 /* supperdiagonal part */ 1354 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1359 { 1360 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1365 /* do diagonal part */ 1366 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1367 /* do supperdiagonal part */ 1368 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1369 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1370 1371 /* do subdiagonal part */ 1372 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1373 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1374 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 /* 1379 This only works correctly for square matrices where the subblock A->A is the 1380 diagonal block 1381 */ 1382 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1383 { 1384 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1385 PetscErrorCode ierr; 1386 1387 PetscFunctionBegin; 1388 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1389 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1394 { 1395 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1396 PetscErrorCode ierr; 1397 1398 PetscFunctionBegin; 1399 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1400 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1405 { 1406 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1407 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1408 PetscErrorCode ierr; 1409 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1410 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1411 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1412 1413 PetscFunctionBegin; 1414 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1415 mat->getrowactive = PETSC_TRUE; 1416 1417 if (!mat->rowvalues && (idx || v)) { 1418 /* 1419 allocate enough space to hold information from the longest row. 1420 */ 1421 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1422 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1423 PetscInt max = 1,mbs = mat->mbs,tmp; 1424 for (i=0; i<mbs; i++) { 1425 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1426 if (max < tmp) max = tmp; 1427 } 1428 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1429 } 1430 1431 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1432 lrow = row - brstart; /* local row index */ 1433 1434 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1435 if (!v) {pvA = 0; pvB = 0;} 1436 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1437 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1438 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1439 nztot = nzA + nzB; 1440 1441 cmap = mat->garray; 1442 if (v || idx) { 1443 if (nztot) { 1444 /* Sort by increasing column numbers, assuming A and B already sorted */ 1445 PetscInt imark = -1; 1446 if (v) { 1447 *v = v_p = mat->rowvalues; 1448 for (i=0; i<nzB; i++) { 1449 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1450 else break; 1451 } 1452 imark = i; 1453 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1454 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1455 } 1456 if (idx) { 1457 *idx = idx_p = mat->rowindices; 1458 if (imark > -1) { 1459 for (i=0; i<imark; i++) { 1460 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1461 } 1462 } else { 1463 for (i=0; i<nzB; i++) { 1464 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1465 else break; 1466 } 1467 imark = i; 1468 } 1469 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1470 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1471 } 1472 } else { 1473 if (idx) *idx = 0; 1474 if (v) *v = 0; 1475 } 1476 } 1477 *nz = nztot; 1478 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1479 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } 1482 1483 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1484 { 1485 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1486 1487 PetscFunctionBegin; 1488 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1489 baij->getrowactive = PETSC_FALSE; 1490 PetscFunctionReturn(0); 1491 } 1492 1493 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1494 { 1495 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1496 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1497 1498 PetscFunctionBegin; 1499 aA->getrow_utriangular = PETSC_TRUE; 1500 PetscFunctionReturn(0); 1501 } 1502 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1503 { 1504 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1505 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1506 1507 PetscFunctionBegin; 1508 aA->getrow_utriangular = PETSC_FALSE; 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1513 { 1514 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1515 PetscErrorCode ierr; 1516 1517 PetscFunctionBegin; 1518 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1519 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1524 { 1525 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1526 PetscErrorCode ierr; 1527 1528 PetscFunctionBegin; 1529 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1530 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1535 Input: isrow - distributed(parallel), 1536 iscol_local - locally owned (seq) 1537 */ 1538 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1539 { 1540 PetscErrorCode ierr; 1541 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1542 const PetscInt *ptr1,*ptr2; 1543 1544 PetscFunctionBegin; 1545 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1546 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1547 if (sz1 > sz2) { 1548 *flg = PETSC_FALSE; 1549 PetscFunctionReturn(0); 1550 } 1551 1552 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1553 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1554 1555 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1556 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1557 ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr); 1558 ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr); 1559 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1560 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1561 1562 nmatch=0; 1563 k = 0; 1564 for (i=0; i<sz1; i++){ 1565 for (j=k; j<sz2; j++){ 1566 if (a1[i] == a2[j]) { 1567 k = j; nmatch++; 1568 break; 1569 } 1570 } 1571 } 1572 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1573 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1574 ierr = PetscFree(a1);CHKERRQ(ierr); 1575 ierr = PetscFree(a2);CHKERRQ(ierr); 1576 if (nmatch < sz1) { 1577 *flg = PETSC_FALSE; 1578 } else { 1579 *flg = PETSC_TRUE; 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1585 { 1586 PetscErrorCode ierr; 1587 IS iscol_local; 1588 PetscInt csize; 1589 PetscBool isequal; 1590 1591 PetscFunctionBegin; 1592 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1593 if (call == MAT_REUSE_MATRIX) { 1594 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1595 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1596 } else { 1597 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1598 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1599 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1600 } 1601 1602 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1603 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1604 if (call == MAT_INITIAL_MATRIX) { 1605 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1606 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1607 } 1608 PetscFunctionReturn(0); 1609 } 1610 1611 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1612 { 1613 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1614 PetscErrorCode ierr; 1615 1616 PetscFunctionBegin; 1617 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1618 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1623 { 1624 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1625 Mat A = a->A,B = a->B; 1626 PetscErrorCode ierr; 1627 PetscReal isend[5],irecv[5]; 1628 1629 PetscFunctionBegin; 1630 info->block_size = (PetscReal)matin->rmap->bs; 1631 1632 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1633 1634 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1635 isend[3] = info->memory; isend[4] = info->mallocs; 1636 1637 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1638 1639 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1640 isend[3] += info->memory; isend[4] += info->mallocs; 1641 if (flag == MAT_LOCAL) { 1642 info->nz_used = isend[0]; 1643 info->nz_allocated = isend[1]; 1644 info->nz_unneeded = isend[2]; 1645 info->memory = isend[3]; 1646 info->mallocs = isend[4]; 1647 } else if (flag == MAT_GLOBAL_MAX) { 1648 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1649 1650 info->nz_used = irecv[0]; 1651 info->nz_allocated = irecv[1]; 1652 info->nz_unneeded = irecv[2]; 1653 info->memory = irecv[3]; 1654 info->mallocs = irecv[4]; 1655 } else if (flag == MAT_GLOBAL_SUM) { 1656 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1657 1658 info->nz_used = irecv[0]; 1659 info->nz_allocated = irecv[1]; 1660 info->nz_unneeded = irecv[2]; 1661 info->memory = irecv[3]; 1662 info->mallocs = irecv[4]; 1663 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1664 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1665 info->fill_ratio_needed = 0; 1666 info->factor_mallocs = 0; 1667 PetscFunctionReturn(0); 1668 } 1669 1670 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1671 { 1672 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1673 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1674 PetscErrorCode ierr; 1675 1676 PetscFunctionBegin; 1677 switch (op) { 1678 case MAT_NEW_NONZERO_LOCATIONS: 1679 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1680 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1681 case MAT_KEEP_NONZERO_PATTERN: 1682 case MAT_SUBMAT_SINGLEIS: 1683 case MAT_NEW_NONZERO_LOCATION_ERR: 1684 MatCheckPreallocated(A,1); 1685 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1686 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1687 break; 1688 case MAT_ROW_ORIENTED: 1689 MatCheckPreallocated(A,1); 1690 a->roworiented = flg; 1691 1692 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1693 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1694 break; 1695 case MAT_NEW_DIAGONALS: 1696 case MAT_SORTED_FULL: 1697 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1698 break; 1699 case MAT_IGNORE_OFF_PROC_ENTRIES: 1700 a->donotstash = flg; 1701 break; 1702 case MAT_USE_HASH_TABLE: 1703 a->ht_flag = flg; 1704 break; 1705 case MAT_HERMITIAN: 1706 MatCheckPreallocated(A,1); 1707 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1708 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1709 #if defined(PETSC_USE_COMPLEX) 1710 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1711 #endif 1712 break; 1713 case MAT_SPD: 1714 A->spd_set = PETSC_TRUE; 1715 A->spd = flg; 1716 if (flg) { 1717 A->symmetric = PETSC_TRUE; 1718 A->structurally_symmetric = PETSC_TRUE; 1719 A->symmetric_set = PETSC_TRUE; 1720 A->structurally_symmetric_set = PETSC_TRUE; 1721 } 1722 break; 1723 case MAT_SYMMETRIC: 1724 MatCheckPreallocated(A,1); 1725 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1726 break; 1727 case MAT_STRUCTURALLY_SYMMETRIC: 1728 MatCheckPreallocated(A,1); 1729 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1730 break; 1731 case MAT_SYMMETRY_ETERNAL: 1732 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1733 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1734 break; 1735 case MAT_IGNORE_LOWER_TRIANGULAR: 1736 aA->ignore_ltriangular = flg; 1737 break; 1738 case MAT_ERROR_LOWER_TRIANGULAR: 1739 aA->ignore_ltriangular = flg; 1740 break; 1741 case MAT_GETROW_UPPERTRIANGULAR: 1742 aA->getrow_utriangular = flg; 1743 break; 1744 default: 1745 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1746 } 1747 PetscFunctionReturn(0); 1748 } 1749 1750 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1751 { 1752 PetscErrorCode ierr; 1753 1754 PetscFunctionBegin; 1755 if (reuse == MAT_INITIAL_MATRIX) { 1756 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1757 } else if (reuse == MAT_REUSE_MATRIX) { 1758 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1759 } 1760 PetscFunctionReturn(0); 1761 } 1762 1763 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1764 { 1765 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1766 Mat a = baij->A, b=baij->B; 1767 PetscErrorCode ierr; 1768 PetscInt nv,m,n; 1769 PetscBool flg; 1770 1771 PetscFunctionBegin; 1772 if (ll != rr) { 1773 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1774 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1775 } 1776 if (!ll) PetscFunctionReturn(0); 1777 1778 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1779 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1780 1781 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1782 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1783 1784 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1785 1786 /* left diagonalscale the off-diagonal part */ 1787 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1788 1789 /* scale the diagonal part */ 1790 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1791 1792 /* right diagonalscale the off-diagonal part */ 1793 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1794 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1795 PetscFunctionReturn(0); 1796 } 1797 1798 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1799 { 1800 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1801 PetscErrorCode ierr; 1802 1803 PetscFunctionBegin; 1804 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1809 1810 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1811 { 1812 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1813 Mat a,b,c,d; 1814 PetscBool flg; 1815 PetscErrorCode ierr; 1816 1817 PetscFunctionBegin; 1818 a = matA->A; b = matA->B; 1819 c = matB->A; d = matB->B; 1820 1821 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1822 if (flg) { 1823 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1824 } 1825 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 1829 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1830 { 1831 PetscErrorCode ierr; 1832 PetscBool isbaij; 1833 1834 PetscFunctionBegin; 1835 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 1836 if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1837 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1838 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1839 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1840 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1841 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1842 } else { 1843 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1844 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1845 1846 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1847 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1848 } 1849 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1850 PetscFunctionReturn(0); 1851 } 1852 1853 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1854 { 1855 PetscErrorCode ierr; 1856 1857 PetscFunctionBegin; 1858 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1859 PetscFunctionReturn(0); 1860 } 1861 1862 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1863 { 1864 PetscErrorCode ierr; 1865 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1866 PetscBLASInt bnz,one=1; 1867 Mat_SeqSBAIJ *xa,*ya; 1868 Mat_SeqBAIJ *xb,*yb; 1869 1870 PetscFunctionBegin; 1871 if (str == SAME_NONZERO_PATTERN) { 1872 PetscScalar alpha = a; 1873 xa = (Mat_SeqSBAIJ*)xx->A->data; 1874 ya = (Mat_SeqSBAIJ*)yy->A->data; 1875 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1876 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1877 xb = (Mat_SeqBAIJ*)xx->B->data; 1878 yb = (Mat_SeqBAIJ*)yy->B->data; 1879 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1880 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1881 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1882 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1883 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1884 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1885 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1886 } else { 1887 Mat B; 1888 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1889 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1890 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1891 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1892 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1893 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1894 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1895 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1896 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1897 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1898 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1899 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1900 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1901 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1902 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1903 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1904 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1905 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1906 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1907 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1908 } 1909 PetscFunctionReturn(0); 1910 } 1911 1912 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1913 { 1914 PetscErrorCode ierr; 1915 PetscInt i; 1916 PetscBool flg; 1917 1918 PetscFunctionBegin; 1919 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1920 for (i=0; i<n; i++) { 1921 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1922 if (!flg) { 1923 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1924 } 1925 } 1926 PetscFunctionReturn(0); 1927 } 1928 1929 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1930 { 1931 PetscErrorCode ierr; 1932 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1933 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1934 1935 PetscFunctionBegin; 1936 if (!Y->preallocated) { 1937 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1938 } else if (!aij->nz) { 1939 PetscInt nonew = aij->nonew; 1940 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1941 aij->nonew = nonew; 1942 } 1943 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1944 PetscFunctionReturn(0); 1945 } 1946 1947 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1948 { 1949 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1950 PetscErrorCode ierr; 1951 1952 PetscFunctionBegin; 1953 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1954 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1955 if (d) { 1956 PetscInt rstart; 1957 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1958 *d += rstart/A->rmap->bs; 1959 1960 } 1961 PetscFunctionReturn(0); 1962 } 1963 1964 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1965 { 1966 PetscFunctionBegin; 1967 *a = ((Mat_MPISBAIJ*)A->data)->A; 1968 PetscFunctionReturn(0); 1969 } 1970 1971 /* -------------------------------------------------------------------*/ 1972 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1973 MatGetRow_MPISBAIJ, 1974 MatRestoreRow_MPISBAIJ, 1975 MatMult_MPISBAIJ, 1976 /* 4*/ MatMultAdd_MPISBAIJ, 1977 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1978 MatMultAdd_MPISBAIJ, 1979 0, 1980 0, 1981 0, 1982 /* 10*/ 0, 1983 0, 1984 0, 1985 MatSOR_MPISBAIJ, 1986 MatTranspose_MPISBAIJ, 1987 /* 15*/ MatGetInfo_MPISBAIJ, 1988 MatEqual_MPISBAIJ, 1989 MatGetDiagonal_MPISBAIJ, 1990 MatDiagonalScale_MPISBAIJ, 1991 MatNorm_MPISBAIJ, 1992 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1993 MatAssemblyEnd_MPISBAIJ, 1994 MatSetOption_MPISBAIJ, 1995 MatZeroEntries_MPISBAIJ, 1996 /* 24*/ 0, 1997 0, 1998 0, 1999 0, 2000 0, 2001 /* 29*/ MatSetUp_MPISBAIJ, 2002 0, 2003 0, 2004 MatGetDiagonalBlock_MPISBAIJ, 2005 0, 2006 /* 34*/ MatDuplicate_MPISBAIJ, 2007 0, 2008 0, 2009 0, 2010 0, 2011 /* 39*/ MatAXPY_MPISBAIJ, 2012 MatCreateSubMatrices_MPISBAIJ, 2013 MatIncreaseOverlap_MPISBAIJ, 2014 MatGetValues_MPISBAIJ, 2015 MatCopy_MPISBAIJ, 2016 /* 44*/ 0, 2017 MatScale_MPISBAIJ, 2018 MatShift_MPISBAIJ, 2019 0, 2020 0, 2021 /* 49*/ 0, 2022 0, 2023 0, 2024 0, 2025 0, 2026 /* 54*/ 0, 2027 0, 2028 MatSetUnfactored_MPISBAIJ, 2029 0, 2030 MatSetValuesBlocked_MPISBAIJ, 2031 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 2032 0, 2033 0, 2034 0, 2035 0, 2036 /* 64*/ 0, 2037 0, 2038 0, 2039 0, 2040 0, 2041 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 2042 0, 2043 0, 2044 0, 2045 0, 2046 /* 74*/ 0, 2047 0, 2048 0, 2049 0, 2050 0, 2051 /* 79*/ 0, 2052 0, 2053 0, 2054 0, 2055 MatLoad_MPISBAIJ, 2056 /* 84*/ 0, 2057 0, 2058 0, 2059 0, 2060 0, 2061 /* 89*/ 0, 2062 0, 2063 0, 2064 0, 2065 0, 2066 /* 94*/ 0, 2067 0, 2068 0, 2069 0, 2070 0, 2071 /* 99*/ 0, 2072 0, 2073 0, 2074 0, 2075 0, 2076 /*104*/ 0, 2077 MatRealPart_MPISBAIJ, 2078 MatImaginaryPart_MPISBAIJ, 2079 MatGetRowUpperTriangular_MPISBAIJ, 2080 MatRestoreRowUpperTriangular_MPISBAIJ, 2081 /*109*/ 0, 2082 0, 2083 0, 2084 0, 2085 MatMissingDiagonal_MPISBAIJ, 2086 /*114*/ 0, 2087 0, 2088 0, 2089 0, 2090 0, 2091 /*119*/ 0, 2092 0, 2093 0, 2094 0, 2095 0, 2096 /*124*/ 0, 2097 0, 2098 0, 2099 0, 2100 0, 2101 /*129*/ 0, 2102 0, 2103 0, 2104 0, 2105 0, 2106 /*134*/ 0, 2107 0, 2108 0, 2109 0, 2110 0, 2111 /*139*/ MatSetBlockSizes_Default, 2112 0, 2113 0, 2114 0, 2115 0, 2116 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2117 }; 2118 2119 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2120 { 2121 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 2122 PetscErrorCode ierr; 2123 PetscInt i,mbs,Mbs; 2124 PetscMPIInt size; 2125 2126 PetscFunctionBegin; 2127 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2128 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2129 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2130 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2131 if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N); 2132 if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n); 2133 2134 mbs = B->rmap->n/bs; 2135 Mbs = B->rmap->N/bs; 2136 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2137 2138 B->rmap->bs = bs; 2139 b->bs2 = bs*bs; 2140 b->mbs = mbs; 2141 b->Mbs = Mbs; 2142 b->nbs = B->cmap->n/bs; 2143 b->Nbs = B->cmap->N/bs; 2144 2145 for (i=0; i<=b->size; i++) { 2146 b->rangebs[i] = B->rmap->range[i]/bs; 2147 } 2148 b->rstartbs = B->rmap->rstart/bs; 2149 b->rendbs = B->rmap->rend/bs; 2150 2151 b->cstartbs = B->cmap->rstart/bs; 2152 b->cendbs = B->cmap->rend/bs; 2153 2154 #if defined(PETSC_USE_CTABLE) 2155 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2156 #else 2157 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2158 #endif 2159 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2160 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2161 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2162 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2163 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2164 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2165 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2166 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2167 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2168 2169 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2170 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2171 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2172 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2173 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2174 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2175 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2176 2177 if (!B->preallocated) { 2178 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2179 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2180 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2181 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2182 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2183 } 2184 2185 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2186 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2187 2188 B->preallocated = PETSC_TRUE; 2189 B->was_assembled = PETSC_FALSE; 2190 B->assembled = PETSC_FALSE; 2191 PetscFunctionReturn(0); 2192 } 2193 2194 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2195 { 2196 PetscInt m,rstart,cend; 2197 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0; 2198 const PetscInt *JJ =0; 2199 PetscScalar *values=0; 2200 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 2201 PetscErrorCode ierr; 2202 PetscBool nooffprocentries; 2203 2204 PetscFunctionBegin; 2205 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2206 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2207 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2208 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2209 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2210 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2211 m = B->rmap->n/bs; 2212 rstart = B->rmap->rstart/bs; 2213 cend = B->cmap->rend/bs; 2214 2215 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2216 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2217 for (i=0; i<m; i++) { 2218 nz = ii[i+1] - ii[i]; 2219 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2220 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2221 JJ = jj + ii[i]; 2222 bd = 0; 2223 for (j=0; j<nz; j++) { 2224 if (*JJ >= i + rstart) break; 2225 JJ++; 2226 bd++; 2227 } 2228 d = 0; 2229 for (; j<nz; j++) { 2230 if (*JJ++ >= cend) break; 2231 d++; 2232 } 2233 d_nnz[i] = d; 2234 o_nnz[i] = nz - d - bd; 2235 nz = nz - bd; 2236 nz_max = PetscMax(nz_max,nz); 2237 } 2238 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2239 ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2240 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2241 2242 values = (PetscScalar*)V; 2243 if (!values) { 2244 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2245 } 2246 for (i=0; i<m; i++) { 2247 PetscInt row = i + rstart; 2248 PetscInt ncols = ii[i+1] - ii[i]; 2249 const PetscInt *icols = jj + ii[i]; 2250 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2251 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2252 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2253 } else { /* block ordering does not match so we can only insert one block at a time. */ 2254 PetscInt j; 2255 for (j=0; j<ncols; j++) { 2256 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2257 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2258 } 2259 } 2260 } 2261 2262 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2263 nooffprocentries = B->nooffprocentries; 2264 B->nooffprocentries = PETSC_TRUE; 2265 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2266 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2267 B->nooffprocentries = nooffprocentries; 2268 2269 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2270 PetscFunctionReturn(0); 2271 } 2272 2273 /*MC 2274 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2275 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2276 the matrix is stored. 2277 2278 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2279 can call MatSetOption(Mat, MAT_HERMITIAN); 2280 2281 Options Database Keys: 2282 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2283 2284 Notes: 2285 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2286 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2287 2288 Level: beginner 2289 2290 .seealso: MatCreateMPISBAIJ(), MATSEQSBAIJ, MatType 2291 M*/ 2292 2293 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2294 { 2295 Mat_MPISBAIJ *b; 2296 PetscErrorCode ierr; 2297 PetscBool flg = PETSC_FALSE; 2298 2299 PetscFunctionBegin; 2300 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2301 B->data = (void*)b; 2302 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2303 2304 B->ops->destroy = MatDestroy_MPISBAIJ; 2305 B->ops->view = MatView_MPISBAIJ; 2306 B->assembled = PETSC_FALSE; 2307 B->insertmode = NOT_SET_VALUES; 2308 2309 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2310 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2311 2312 /* build local table of row and column ownerships */ 2313 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2314 2315 /* build cache for off array entries formed */ 2316 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2317 2318 b->donotstash = PETSC_FALSE; 2319 b->colmap = NULL; 2320 b->garray = NULL; 2321 b->roworiented = PETSC_TRUE; 2322 2323 /* stuff used in block assembly */ 2324 b->barray = 0; 2325 2326 /* stuff used for matrix vector multiply */ 2327 b->lvec = 0; 2328 b->Mvctx = 0; 2329 b->slvec0 = 0; 2330 b->slvec0b = 0; 2331 b->slvec1 = 0; 2332 b->slvec1a = 0; 2333 b->slvec1b = 0; 2334 b->sMvctx = 0; 2335 2336 /* stuff for MatGetRow() */ 2337 b->rowindices = 0; 2338 b->rowvalues = 0; 2339 b->getrowactive = PETSC_FALSE; 2340 2341 /* hash table stuff */ 2342 b->ht = 0; 2343 b->hd = 0; 2344 b->ht_size = 0; 2345 b->ht_flag = PETSC_FALSE; 2346 b->ht_fact = 0; 2347 b->ht_total_ct = 0; 2348 b->ht_insert_ct = 0; 2349 2350 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2351 b->ijonly = PETSC_FALSE; 2352 2353 b->in_loc = 0; 2354 b->v_loc = 0; 2355 b->n_loc = 0; 2356 2357 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2358 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2359 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2360 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2361 #if defined(PETSC_HAVE_ELEMENTAL) 2362 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2363 #endif 2364 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr); 2365 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr); 2366 2367 B->symmetric = PETSC_TRUE; 2368 B->structurally_symmetric = PETSC_TRUE; 2369 B->symmetric_set = PETSC_TRUE; 2370 B->structurally_symmetric_set = PETSC_TRUE; 2371 B->symmetric_eternal = PETSC_TRUE; 2372 2373 B->hermitian = PETSC_FALSE; 2374 B->hermitian_set = PETSC_FALSE; 2375 2376 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2377 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2378 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2379 if (flg) { 2380 PetscReal fact = 1.39; 2381 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2382 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2383 if (fact <= 1.0) fact = 1.39; 2384 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2385 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2386 } 2387 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 /*MC 2392 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2393 2394 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2395 and MATMPISBAIJ otherwise. 2396 2397 Options Database Keys: 2398 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2399 2400 Level: beginner 2401 2402 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2403 M*/ 2404 2405 /*@C 2406 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2407 the user should preallocate the matrix storage by setting the parameters 2408 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2409 performance can be increased by more than a factor of 50. 2410 2411 Collective on Mat 2412 2413 Input Parameters: 2414 + B - the matrix 2415 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2416 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2417 . d_nz - number of block nonzeros per block row in diagonal portion of local 2418 submatrix (same for all local rows) 2419 . d_nnz - array containing the number of block nonzeros in the various block rows 2420 in the upper triangular and diagonal part of the in diagonal portion of the local 2421 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2422 for the diagonal entry and set a value even if it is zero. 2423 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2424 submatrix (same for all local rows). 2425 - o_nnz - array containing the number of nonzeros in the various block rows of the 2426 off-diagonal portion of the local submatrix that is right of the diagonal 2427 (possibly different for each block row) or NULL. 2428 2429 2430 Options Database Keys: 2431 + -mat_no_unroll - uses code that does not unroll the loops in the 2432 block calculations (much slower) 2433 - -mat_block_size - size of the blocks to use 2434 2435 Notes: 2436 2437 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2438 than it must be used on all processors that share the object for that argument. 2439 2440 If the *_nnz parameter is given then the *_nz parameter is ignored 2441 2442 Storage Information: 2443 For a square global matrix we define each processor's diagonal portion 2444 to be its local rows and the corresponding columns (a square submatrix); 2445 each processor's off-diagonal portion encompasses the remainder of the 2446 local matrix (a rectangular submatrix). 2447 2448 The user can specify preallocated storage for the diagonal part of 2449 the local submatrix with either d_nz or d_nnz (not both). Set 2450 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2451 memory allocation. Likewise, specify preallocated storage for the 2452 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2453 2454 You can call MatGetInfo() to get information on how effective the preallocation was; 2455 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2456 You can also run with the option -info and look for messages with the string 2457 malloc in them to see if additional memory allocation was needed. 2458 2459 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2460 the figure below we depict these three local rows and all columns (0-11). 2461 2462 .vb 2463 0 1 2 3 4 5 6 7 8 9 10 11 2464 -------------------------- 2465 row 3 |. . . d d d o o o o o o 2466 row 4 |. . . d d d o o o o o o 2467 row 5 |. . . d d d o o o o o o 2468 -------------------------- 2469 .ve 2470 2471 Thus, any entries in the d locations are stored in the d (diagonal) 2472 submatrix, and any entries in the o locations are stored in the 2473 o (off-diagonal) submatrix. Note that the d matrix is stored in 2474 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2475 2476 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2477 plus the diagonal part of the d matrix, 2478 and o_nz should indicate the number of block nonzeros per row in the o matrix 2479 2480 In general, for PDE problems in which most nonzeros are near the diagonal, 2481 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2482 or you will get TERRIBLE performance; see the users' manual chapter on 2483 matrices. 2484 2485 Level: intermediate 2486 2487 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2488 @*/ 2489 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2490 { 2491 PetscErrorCode ierr; 2492 2493 PetscFunctionBegin; 2494 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2495 PetscValidType(B,1); 2496 PetscValidLogicalCollectiveInt(B,bs,2); 2497 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2498 PetscFunctionReturn(0); 2499 } 2500 2501 /*@C 2502 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2503 (block compressed row). For good matrix assembly performance 2504 the user should preallocate the matrix storage by setting the parameters 2505 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2506 performance can be increased by more than a factor of 50. 2507 2508 Collective 2509 2510 Input Parameters: 2511 + comm - MPI communicator 2512 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2513 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2514 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2515 This value should be the same as the local size used in creating the 2516 y vector for the matrix-vector product y = Ax. 2517 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2518 This value should be the same as the local size used in creating the 2519 x vector for the matrix-vector product y = Ax. 2520 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2521 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2522 . d_nz - number of block nonzeros per block row in diagonal portion of local 2523 submatrix (same for all local rows) 2524 . d_nnz - array containing the number of block nonzeros in the various block rows 2525 in the upper triangular portion of the in diagonal portion of the local 2526 (possibly different for each block block row) or NULL. 2527 If you plan to factor the matrix you must leave room for the diagonal entry and 2528 set its value even if it is zero. 2529 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2530 submatrix (same for all local rows). 2531 - o_nnz - array containing the number of nonzeros in the various block rows of the 2532 off-diagonal portion of the local submatrix (possibly different for 2533 each block row) or NULL. 2534 2535 Output Parameter: 2536 . A - the matrix 2537 2538 Options Database Keys: 2539 + -mat_no_unroll - uses code that does not unroll the loops in the 2540 block calculations (much slower) 2541 . -mat_block_size - size of the blocks to use 2542 - -mat_mpi - use the parallel matrix data structures even on one processor 2543 (defaults to using SeqBAIJ format on one processor) 2544 2545 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2546 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2547 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2548 2549 Notes: 2550 The number of rows and columns must be divisible by blocksize. 2551 This matrix type does not support complex Hermitian operation. 2552 2553 The user MUST specify either the local or global matrix dimensions 2554 (possibly both). 2555 2556 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2557 than it must be used on all processors that share the object for that argument. 2558 2559 If the *_nnz parameter is given then the *_nz parameter is ignored 2560 2561 Storage Information: 2562 For a square global matrix we define each processor's diagonal portion 2563 to be its local rows and the corresponding columns (a square submatrix); 2564 each processor's off-diagonal portion encompasses the remainder of the 2565 local matrix (a rectangular submatrix). 2566 2567 The user can specify preallocated storage for the diagonal part of 2568 the local submatrix with either d_nz or d_nnz (not both). Set 2569 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2570 memory allocation. Likewise, specify preallocated storage for the 2571 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2572 2573 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2574 the figure below we depict these three local rows and all columns (0-11). 2575 2576 .vb 2577 0 1 2 3 4 5 6 7 8 9 10 11 2578 -------------------------- 2579 row 3 |. . . d d d o o o o o o 2580 row 4 |. . . d d d o o o o o o 2581 row 5 |. . . d d d o o o o o o 2582 -------------------------- 2583 .ve 2584 2585 Thus, any entries in the d locations are stored in the d (diagonal) 2586 submatrix, and any entries in the o locations are stored in the 2587 o (off-diagonal) submatrix. Note that the d matrix is stored in 2588 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2589 2590 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2591 plus the diagonal part of the d matrix, 2592 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2593 In general, for PDE problems in which most nonzeros are near the diagonal, 2594 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2595 or you will get TERRIBLE performance; see the users' manual chapter on 2596 matrices. 2597 2598 Level: intermediate 2599 2600 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2601 @*/ 2602 2603 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2604 { 2605 PetscErrorCode ierr; 2606 PetscMPIInt size; 2607 2608 PetscFunctionBegin; 2609 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2610 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2611 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2612 if (size > 1) { 2613 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2614 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2615 } else { 2616 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2617 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2618 } 2619 PetscFunctionReturn(0); 2620 } 2621 2622 2623 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2624 { 2625 Mat mat; 2626 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2627 PetscErrorCode ierr; 2628 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2629 PetscScalar *array; 2630 2631 PetscFunctionBegin; 2632 *newmat = 0; 2633 2634 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2635 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2636 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2637 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2638 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2639 2640 mat->factortype = matin->factortype; 2641 mat->preallocated = PETSC_TRUE; 2642 mat->assembled = PETSC_TRUE; 2643 mat->insertmode = NOT_SET_VALUES; 2644 2645 a = (Mat_MPISBAIJ*)mat->data; 2646 a->bs2 = oldmat->bs2; 2647 a->mbs = oldmat->mbs; 2648 a->nbs = oldmat->nbs; 2649 a->Mbs = oldmat->Mbs; 2650 a->Nbs = oldmat->Nbs; 2651 2652 a->size = oldmat->size; 2653 a->rank = oldmat->rank; 2654 a->donotstash = oldmat->donotstash; 2655 a->roworiented = oldmat->roworiented; 2656 a->rowindices = 0; 2657 a->rowvalues = 0; 2658 a->getrowactive = PETSC_FALSE; 2659 a->barray = 0; 2660 a->rstartbs = oldmat->rstartbs; 2661 a->rendbs = oldmat->rendbs; 2662 a->cstartbs = oldmat->cstartbs; 2663 a->cendbs = oldmat->cendbs; 2664 2665 /* hash table stuff */ 2666 a->ht = 0; 2667 a->hd = 0; 2668 a->ht_size = 0; 2669 a->ht_flag = oldmat->ht_flag; 2670 a->ht_fact = oldmat->ht_fact; 2671 a->ht_total_ct = 0; 2672 a->ht_insert_ct = 0; 2673 2674 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr); 2675 if (oldmat->colmap) { 2676 #if defined(PETSC_USE_CTABLE) 2677 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2678 #else 2679 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2680 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2681 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 2682 #endif 2683 } else a->colmap = 0; 2684 2685 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2686 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2687 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2688 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 2689 } else a->garray = 0; 2690 2691 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2692 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2693 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2694 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2695 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2696 2697 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2698 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2699 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2700 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2701 2702 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2703 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2704 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2705 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2706 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2707 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2708 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2709 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2710 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2711 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2712 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2713 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2714 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2715 2716 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2717 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2718 a->sMvctx = oldmat->sMvctx; 2719 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2720 2721 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2722 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2723 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2724 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2725 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2726 *newmat = mat; 2727 PetscFunctionReturn(0); 2728 } 2729 2730 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2731 { 2732 PetscErrorCode ierr; 2733 PetscInt i,nz,j,rstart,rend; 2734 PetscScalar *vals,*buf; 2735 MPI_Comm comm; 2736 MPI_Status status; 2737 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2738 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2739 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2740 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2741 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2742 PetscInt dcount,kmax,k,nzcount,tmp; 2743 int fd; 2744 PetscBool isbinary; 2745 2746 PetscFunctionBegin; 2747 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2748 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 2749 2750 /* force binary viewer to load .info file if it has not yet done so */ 2751 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2752 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2753 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2754 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2755 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2756 if (bs < 0) bs = 1; 2757 2758 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2759 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2760 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2761 if (!rank) { 2762 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2763 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2764 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2765 } 2766 2767 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2768 M = header[1]; 2769 N = header[2]; 2770 2771 /* If global sizes are set, check if they are consistent with that given in the file */ 2772 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2773 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2774 2775 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2776 2777 /* 2778 This code adds extra rows to make sure the number of rows is 2779 divisible by the blocksize 2780 */ 2781 Mbs = M/bs; 2782 extra_rows = bs - M + bs*(Mbs); 2783 if (extra_rows == bs) extra_rows = 0; 2784 else Mbs++; 2785 if (extra_rows &&!rank) { 2786 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2787 } 2788 2789 /* determine ownership of all rows */ 2790 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2791 mbs = Mbs/size + ((Mbs % size) > rank); 2792 m = mbs*bs; 2793 } else { /* User Set */ 2794 m = newmat->rmap->n; 2795 mbs = m/bs; 2796 } 2797 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2798 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2799 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2800 rowners[0] = 0; 2801 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2802 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2803 rstart = rowners[rank]; 2804 rend = rowners[rank+1]; 2805 2806 /* distribute row lengths to all processors */ 2807 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2808 if (!rank) { 2809 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2810 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 2811 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2812 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2813 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2814 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2815 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2816 } else { 2817 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2818 } 2819 2820 if (!rank) { /* procs[0] */ 2821 /* calculate the number of nonzeros on each processor */ 2822 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2823 for (i=0; i<size; i++) { 2824 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2825 procsnz[i] += rowlengths[j]; 2826 } 2827 } 2828 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2829 2830 /* determine max buffer needed and allocate it */ 2831 maxnz = 0; 2832 for (i=0; i<size; i++) { 2833 maxnz = PetscMax(maxnz,procsnz[i]); 2834 } 2835 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2836 2837 /* read in my part of the matrix column indices */ 2838 nz = procsnz[0]; 2839 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2840 mycols = ibuf; 2841 if (size == 1) nz -= extra_rows; 2842 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2843 if (size == 1) { 2844 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2845 } 2846 2847 /* read in every ones (except the last) and ship off */ 2848 for (i=1; i<size-1; i++) { 2849 nz = procsnz[i]; 2850 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2851 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2852 } 2853 /* read in the stuff for the last proc */ 2854 if (size != 1) { 2855 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2856 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2857 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2858 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2859 } 2860 ierr = PetscFree(cols);CHKERRQ(ierr); 2861 } else { /* procs[i], i>0 */ 2862 /* determine buffer space needed for message */ 2863 nz = 0; 2864 for (i=0; i<m; i++) nz += locrowlens[i]; 2865 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2866 mycols = ibuf; 2867 /* receive message of column indices*/ 2868 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2869 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2870 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2871 } 2872 2873 /* loop over local rows, determining number of off diagonal entries */ 2874 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2875 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2876 rowcount = 0; 2877 nzcount = 0; 2878 for (i=0; i<mbs; i++) { 2879 dcount = 0; 2880 odcount = 0; 2881 for (j=0; j<bs; j++) { 2882 kmax = locrowlens[rowcount]; 2883 for (k=0; k<kmax; k++) { 2884 tmp = mycols[nzcount++]/bs; /* block col. index */ 2885 if (!mask[tmp]) { 2886 mask[tmp] = 1; 2887 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2888 else masked1[dcount++] = tmp; /* entry in diag portion */ 2889 } 2890 } 2891 rowcount++; 2892 } 2893 2894 dlens[i] = dcount; /* d_nzz[i] */ 2895 odlens[i] = odcount; /* o_nzz[i] */ 2896 2897 /* zero out the mask elements we set */ 2898 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2899 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2900 } 2901 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2902 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2903 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2904 2905 if (!rank) { 2906 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2907 /* read in my part of the matrix numerical values */ 2908 nz = procsnz[0]; 2909 vals = buf; 2910 mycols = ibuf; 2911 if (size == 1) nz -= extra_rows; 2912 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2913 if (size == 1) { 2914 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2915 } 2916 2917 /* insert into matrix */ 2918 jj = rstart*bs; 2919 for (i=0; i<m; i++) { 2920 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2921 mycols += locrowlens[i]; 2922 vals += locrowlens[i]; 2923 jj++; 2924 } 2925 2926 /* read in other processors (except the last one) and ship out */ 2927 for (i=1; i<size-1; i++) { 2928 nz = procsnz[i]; 2929 vals = buf; 2930 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2931 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2932 } 2933 /* the last proc */ 2934 if (size != 1) { 2935 nz = procsnz[i] - extra_rows; 2936 vals = buf; 2937 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2938 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2939 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2940 } 2941 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2942 2943 } else { 2944 /* receive numeric values */ 2945 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2946 2947 /* receive message of values*/ 2948 vals = buf; 2949 mycols = ibuf; 2950 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2951 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2952 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2953 2954 /* insert into matrix */ 2955 jj = rstart*bs; 2956 for (i=0; i<m; i++) { 2957 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2958 mycols += locrowlens[i]; 2959 vals += locrowlens[i]; 2960 jj++; 2961 } 2962 } 2963 2964 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2965 ierr = PetscFree(buf);CHKERRQ(ierr); 2966 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2967 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2968 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2969 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2970 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2971 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2972 PetscFunctionReturn(0); 2973 } 2974 2975 /*XXXXX@ 2976 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2977 2978 Input Parameters: 2979 . mat - the matrix 2980 . fact - factor 2981 2982 Not Collective on Mat, each process can have a different hash factor 2983 2984 Level: advanced 2985 2986 Notes: 2987 This can also be set by the command line option: -mat_use_hash_table fact 2988 2989 .seealso: MatSetOption() 2990 @XXXXX*/ 2991 2992 2993 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2994 { 2995 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2996 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2997 PetscReal atmp; 2998 PetscReal *work,*svalues,*rvalues; 2999 PetscErrorCode ierr; 3000 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 3001 PetscMPIInt rank,size; 3002 PetscInt *rowners_bs,dest,count,source; 3003 PetscScalar *va; 3004 MatScalar *ba; 3005 MPI_Status stat; 3006 3007 PetscFunctionBegin; 3008 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 3009 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 3010 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3011 3012 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3013 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3014 3015 bs = A->rmap->bs; 3016 mbs = a->mbs; 3017 Mbs = a->Mbs; 3018 ba = b->a; 3019 bi = b->i; 3020 bj = b->j; 3021 3022 /* find ownerships */ 3023 rowners_bs = A->rmap->range; 3024 3025 /* each proc creates an array to be distributed */ 3026 ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr); 3027 3028 /* row_max for B */ 3029 if (rank != size-1) { 3030 for (i=0; i<mbs; i++) { 3031 ncols = bi[1] - bi[0]; bi++; 3032 brow = bs*i; 3033 for (j=0; j<ncols; j++) { 3034 bcol = bs*(*bj); 3035 for (kcol=0; kcol<bs; kcol++) { 3036 col = bcol + kcol; /* local col index */ 3037 col += rowners_bs[rank+1]; /* global col index */ 3038 for (krow=0; krow<bs; krow++) { 3039 atmp = PetscAbsScalar(*ba); ba++; 3040 row = brow + krow; /* local row index */ 3041 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 3042 if (work[col] < atmp) work[col] = atmp; 3043 } 3044 } 3045 bj++; 3046 } 3047 } 3048 3049 /* send values to its owners */ 3050 for (dest=rank+1; dest<size; dest++) { 3051 svalues = work + rowners_bs[dest]; 3052 count = rowners_bs[dest+1]-rowners_bs[dest]; 3053 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 3054 } 3055 } 3056 3057 /* receive values */ 3058 if (rank) { 3059 rvalues = work; 3060 count = rowners_bs[rank+1]-rowners_bs[rank]; 3061 for (source=0; source<rank; source++) { 3062 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 3063 /* process values */ 3064 for (i=0; i<count; i++) { 3065 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 3066 } 3067 } 3068 } 3069 3070 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3071 ierr = PetscFree(work);CHKERRQ(ierr); 3072 PetscFunctionReturn(0); 3073 } 3074 3075 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3076 { 3077 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3078 PetscErrorCode ierr; 3079 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 3080 PetscScalar *x,*ptr,*from; 3081 Vec bb1; 3082 const PetscScalar *b; 3083 3084 PetscFunctionBegin; 3085 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3086 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3087 3088 if (flag == SOR_APPLY_UPPER) { 3089 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3090 PetscFunctionReturn(0); 3091 } 3092 3093 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3094 if (flag & SOR_ZERO_INITIAL_GUESS) { 3095 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3096 its--; 3097 } 3098 3099 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3100 while (its--) { 3101 3102 /* lower triangular part: slvec0b = - B^T*xx */ 3103 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3104 3105 /* copy xx into slvec0a */ 3106 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3107 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3108 ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr); 3109 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3110 3111 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3112 3113 /* copy bb into slvec1a */ 3114 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3115 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3116 ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr); 3117 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3118 3119 /* set slvec1b = 0 */ 3120 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3121 3122 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3123 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3124 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3125 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3126 3127 /* upper triangular part: bb1 = bb1 - B*x */ 3128 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3129 3130 /* local diagonal sweep */ 3131 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3132 } 3133 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3134 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3135 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3136 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3137 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3138 } else if (flag & SOR_EISENSTAT) { 3139 Vec xx1; 3140 PetscBool hasop; 3141 const PetscScalar *diag; 3142 PetscScalar *sl,scale = (omega - 2.0)/omega; 3143 PetscInt i,n; 3144 3145 if (!mat->xx1) { 3146 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3147 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3148 } 3149 xx1 = mat->xx1; 3150 bb1 = mat->bb1; 3151 3152 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3153 3154 if (!mat->diag) { 3155 /* this is wrong for same matrix with new nonzero values */ 3156 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3157 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3158 } 3159 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3160 3161 if (hasop) { 3162 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3163 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3164 } else { 3165 /* 3166 These two lines are replaced by code that may be a bit faster for a good compiler 3167 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3168 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3169 */ 3170 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3171 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3172 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3174 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3175 if (omega == 1.0) { 3176 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3177 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3178 } else { 3179 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3180 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3181 } 3182 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3183 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3184 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3185 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3186 } 3187 3188 /* multiply off-diagonal portion of matrix */ 3189 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3190 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3191 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3192 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3193 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 3194 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3195 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3196 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3197 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3198 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3199 3200 /* local sweep */ 3201 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3202 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3203 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3204 PetscFunctionReturn(0); 3205 } 3206 3207 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3208 { 3209 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3210 PetscErrorCode ierr; 3211 Vec lvec1,bb1; 3212 3213 PetscFunctionBegin; 3214 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3215 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3216 3217 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3218 if (flag & SOR_ZERO_INITIAL_GUESS) { 3219 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3220 its--; 3221 } 3222 3223 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3224 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3225 while (its--) { 3226 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3227 3228 /* lower diagonal part: bb1 = bb - B^T*xx */ 3229 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3230 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3231 3232 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3233 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3234 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3235 3236 /* upper diagonal part: bb1 = bb1 - B*x */ 3237 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3238 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3239 3240 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3241 3242 /* diagonal sweep */ 3243 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3244 } 3245 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3246 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3247 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3248 PetscFunctionReturn(0); 3249 } 3250 3251 /*@ 3252 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3253 CSR format the local rows. 3254 3255 Collective 3256 3257 Input Parameters: 3258 + comm - MPI communicator 3259 . bs - the block size, only a block size of 1 is supported 3260 . m - number of local rows (Cannot be PETSC_DECIDE) 3261 . n - This value should be the same as the local size used in creating the 3262 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3263 calculated if N is given) For square matrices n is almost always m. 3264 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3265 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3266 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 3267 . j - column indices 3268 - a - matrix values 3269 3270 Output Parameter: 3271 . mat - the matrix 3272 3273 Level: intermediate 3274 3275 Notes: 3276 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3277 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3278 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3279 3280 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3281 3282 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3283 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3284 @*/ 3285 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3286 { 3287 PetscErrorCode ierr; 3288 3289 3290 PetscFunctionBegin; 3291 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3292 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3293 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3294 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3295 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3296 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3297 PetscFunctionReturn(0); 3298 } 3299 3300 3301 /*@C 3302 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 3303 3304 Collective 3305 3306 Input Parameters: 3307 + B - the matrix 3308 . bs - the block size 3309 . i - the indices into j for the start of each local row (starts with zero) 3310 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3311 - v - optional values in the matrix 3312 3313 Level: advanced 3314 3315 Notes: 3316 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 3317 and usually the numerical values as well 3318 3319 Any entries below the diagonal are ignored 3320 3321 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3322 @*/ 3323 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3324 { 3325 PetscErrorCode ierr; 3326 3327 PetscFunctionBegin; 3328 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3329 PetscFunctionReturn(0); 3330 } 3331 3332 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3333 { 3334 PetscErrorCode ierr; 3335 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3336 PetscInt *indx; 3337 PetscScalar *values; 3338 3339 PetscFunctionBegin; 3340 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3341 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3342 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3343 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3344 PetscInt *bindx,rmax=a->rmax,j; 3345 PetscMPIInt rank,size; 3346 3347 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3348 mbs = m/bs; Nbs = N/cbs; 3349 if (n == PETSC_DECIDE) { 3350 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 3351 } 3352 nbs = n/cbs; 3353 3354 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3355 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3356 3357 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3358 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 3359 if (rank == size-1) { 3360 /* Check sum(nbs) = Nbs */ 3361 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3362 } 3363 3364 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3365 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3366 for (i=0; i<mbs; i++) { 3367 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3368 nnz = nnz/bs; 3369 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3370 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3371 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3372 } 3373 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3374 ierr = PetscFree(bindx);CHKERRQ(ierr); 3375 3376 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3377 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3378 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3379 ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr); 3380 ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3381 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3382 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3383 } 3384 3385 /* numeric phase */ 3386 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3387 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3388 3389 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3390 for (i=0; i<m; i++) { 3391 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3392 Ii = i + rstart; 3393 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3394 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3395 } 3396 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3397 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3398 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3399 PetscFunctionReturn(0); 3400 } 3401