1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 /* This could be moved to matimpl.h */ 12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 13 { 14 Mat preallocator; 15 PetscInt r,rstart,rend; 16 PetscInt bs,i,m,n,M,N; 17 PetscBool cong = PETSC_TRUE; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 22 PetscValidLogicalCollectiveInt(B,nm,2); 23 for (i = 0; i < nm; i++) { 24 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 25 ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 26 if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 27 } 28 PetscValidLogicalCollectiveBool(B,fill,5); 29 ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 30 ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 31 ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 32 ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 33 ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 34 ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 35 ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 36 ierr = MatSetUp(preallocator);CHKERRQ(ierr); 37 ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 38 for (r = rstart; r < rend; ++r) { 39 PetscInt ncols; 40 const PetscInt *row; 41 const PetscScalar *vals; 42 43 for (i = 0; i < nm; i++) { 44 ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 45 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 46 if (symm && symm[i]) { 47 ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 48 } 49 ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 50 } 51 } 52 ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 53 ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 54 ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 55 ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 60 { 61 Mat B; 62 PetscErrorCode ierr; 63 PetscInt r; 64 65 PetscFunctionBegin; 66 if (reuse != MAT_REUSE_MATRIX) { 67 PetscBool symm = PETSC_TRUE,isdense; 68 PetscInt bs; 69 70 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 71 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 72 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 73 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 74 ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 75 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 76 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 77 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr); 78 if (!isdense) { 79 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 80 ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 81 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 82 } else { 83 ierr = MatSetUp(B);CHKERRQ(ierr); 84 } 85 } else { 86 B = *newmat; 87 ierr = MatZeroEntries(B);CHKERRQ(ierr); 88 } 89 90 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 91 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 92 PetscInt ncols; 93 const PetscInt *row; 94 const PetscScalar *vals; 95 96 ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 97 ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 98 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 99 ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 100 } 101 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 102 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 103 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 104 105 if (reuse == MAT_INPLACE_MATRIX) { 106 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 107 } else { 108 *newmat = B; 109 } 110 PetscFunctionReturn(0); 111 } 112 113 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 114 { 115 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 116 PetscErrorCode ierr; 117 118 PetscFunctionBegin; 119 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 120 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 125 { 126 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 131 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 135 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 136 { \ 137 brow = row/bs; \ 138 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 139 rmax = aimax[brow]; nrow = ailen[brow]; \ 140 bcol = col/bs; \ 141 ridx = row % bs; cidx = col % bs; \ 142 low = 0; high = nrow; \ 143 while (high-low > 3) { \ 144 t = (low+high)/2; \ 145 if (rp[t] > bcol) high = t; \ 146 else low = t; \ 147 } \ 148 for (_i=low; _i<high; _i++) { \ 149 if (rp[_i] > bcol) break; \ 150 if (rp[_i] == bcol) { \ 151 bap = ap + bs2*_i + bs*cidx + ridx; \ 152 if (addv == ADD_VALUES) *bap += value; \ 153 else *bap = value; \ 154 goto a_noinsert; \ 155 } \ 156 } \ 157 if (a->nonew == 1) goto a_noinsert; \ 158 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 159 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 160 N = nrow++ - 1; \ 161 /* shift up all the later entries in this row */ \ 162 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 163 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 164 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 165 rp[_i] = bcol; \ 166 ap[bs2*_i + bs*cidx + ridx] = value; \ 167 A->nonzerostate++;\ 168 a_noinsert:; \ 169 ailen[brow] = nrow; \ 170 } 171 172 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 173 { \ 174 brow = row/bs; \ 175 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 176 rmax = bimax[brow]; nrow = bilen[brow]; \ 177 bcol = col/bs; \ 178 ridx = row % bs; cidx = col % bs; \ 179 low = 0; high = nrow; \ 180 while (high-low > 3) { \ 181 t = (low+high)/2; \ 182 if (rp[t] > bcol) high = t; \ 183 else low = t; \ 184 } \ 185 for (_i=low; _i<high; _i++) { \ 186 if (rp[_i] > bcol) break; \ 187 if (rp[_i] == bcol) { \ 188 bap = ap + bs2*_i + bs*cidx + ridx; \ 189 if (addv == ADD_VALUES) *bap += value; \ 190 else *bap = value; \ 191 goto b_noinsert; \ 192 } \ 193 } \ 194 if (b->nonew == 1) goto b_noinsert; \ 195 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 196 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 197 N = nrow++ - 1; \ 198 /* shift up all the later entries in this row */ \ 199 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 200 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 201 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 202 rp[_i] = bcol; \ 203 ap[bs2*_i + bs*cidx + ridx] = value; \ 204 B->nonzerostate++;\ 205 b_noinsert:; \ 206 bilen[brow] = nrow; \ 207 } 208 209 /* Only add/insert a(i,j) with i<=j (blocks). 210 Any a(i,j) with i>j input by user is ingored or generates an error 211 */ 212 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 213 { 214 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 215 MatScalar value; 216 PetscBool roworiented = baij->roworiented; 217 PetscErrorCode ierr; 218 PetscInt i,j,row,col; 219 PetscInt rstart_orig=mat->rmap->rstart; 220 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 221 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 222 223 /* Some Variables required in the macro */ 224 Mat A = baij->A; 225 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 226 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 227 MatScalar *aa =a->a; 228 229 Mat B = baij->B; 230 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 231 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 232 MatScalar *ba =b->a; 233 234 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 235 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 236 MatScalar *ap,*bap; 237 238 /* for stash */ 239 PetscInt n_loc, *in_loc = NULL; 240 MatScalar *v_loc = NULL; 241 242 PetscFunctionBegin; 243 if (!baij->donotstash) { 244 if (n > baij->n_loc) { 245 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 246 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 247 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 248 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 249 250 baij->n_loc = n; 251 } 252 in_loc = baij->in_loc; 253 v_loc = baij->v_loc; 254 } 255 256 for (i=0; i<m; i++) { 257 if (im[i] < 0) continue; 258 #if defined(PETSC_USE_DEBUG) 259 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 260 #endif 261 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 262 row = im[i] - rstart_orig; /* local row index */ 263 for (j=0; j<n; j++) { 264 if (im[i]/bs > in[j]/bs) { 265 if (a->ignore_ltriangular) { 266 continue; /* ignore lower triangular blocks */ 267 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 268 } 269 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 270 col = in[j] - cstart_orig; /* local col index */ 271 brow = row/bs; bcol = col/bs; 272 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 273 if (roworiented) value = v[i*n+j]; 274 else value = v[i+j*m]; 275 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 276 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 277 } else if (in[j] < 0) continue; 278 #if defined(PETSC_USE_DEBUG) 279 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 280 #endif 281 else { /* off-diag entry (B) */ 282 if (mat->was_assembled) { 283 if (!baij->colmap) { 284 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 285 } 286 #if defined(PETSC_USE_CTABLE) 287 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 288 col = col - 1; 289 #else 290 col = baij->colmap[in[j]/bs] - 1; 291 #endif 292 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 293 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 294 col = in[j]; 295 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 296 B = baij->B; 297 b = (Mat_SeqBAIJ*)(B)->data; 298 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 299 ba = b->a; 300 } else col += in[j]%bs; 301 } else col = in[j]; 302 if (roworiented) value = v[i*n+j]; 303 else value = v[i+j*m]; 304 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 305 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 306 } 307 } 308 } else { /* off processor entry */ 309 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 310 if (!baij->donotstash) { 311 mat->assembled = PETSC_FALSE; 312 n_loc = 0; 313 for (j=0; j<n; j++) { 314 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 315 in_loc[n_loc] = in[j]; 316 if (roworiented) { 317 v_loc[n_loc] = v[i*n+j]; 318 } else { 319 v_loc[n_loc] = v[j*m+i]; 320 } 321 n_loc++; 322 } 323 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 324 } 325 } 326 } 327 PetscFunctionReturn(0); 328 } 329 330 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 331 { 332 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 333 PetscErrorCode ierr; 334 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 335 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 336 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 337 PetscBool roworiented=a->roworiented; 338 const PetscScalar *value = v; 339 MatScalar *ap,*aa = a->a,*bap; 340 341 PetscFunctionBegin; 342 if (col < row) { 343 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 344 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 345 } 346 rp = aj + ai[row]; 347 ap = aa + bs2*ai[row]; 348 rmax = imax[row]; 349 nrow = ailen[row]; 350 value = v; 351 low = 0; 352 high = nrow; 353 354 while (high-low > 7) { 355 t = (low+high)/2; 356 if (rp[t] > col) high = t; 357 else low = t; 358 } 359 for (i=low; i<high; i++) { 360 if (rp[i] > col) break; 361 if (rp[i] == col) { 362 bap = ap + bs2*i; 363 if (roworiented) { 364 if (is == ADD_VALUES) { 365 for (ii=0; ii<bs; ii++) { 366 for (jj=ii; jj<bs2; jj+=bs) { 367 bap[jj] += *value++; 368 } 369 } 370 } else { 371 for (ii=0; ii<bs; ii++) { 372 for (jj=ii; jj<bs2; jj+=bs) { 373 bap[jj] = *value++; 374 } 375 } 376 } 377 } else { 378 if (is == ADD_VALUES) { 379 for (ii=0; ii<bs; ii++) { 380 for (jj=0; jj<bs; jj++) { 381 *bap++ += *value++; 382 } 383 } 384 } else { 385 for (ii=0; ii<bs; ii++) { 386 for (jj=0; jj<bs; jj++) { 387 *bap++ = *value++; 388 } 389 } 390 } 391 } 392 goto noinsert2; 393 } 394 } 395 if (nonew == 1) goto noinsert2; 396 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 397 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 398 N = nrow++ - 1; high++; 399 /* shift up all the later entries in this row */ 400 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 401 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 402 rp[i] = col; 403 bap = ap + bs2*i; 404 if (roworiented) { 405 for (ii=0; ii<bs; ii++) { 406 for (jj=ii; jj<bs2; jj+=bs) { 407 bap[jj] = *value++; 408 } 409 } 410 } else { 411 for (ii=0; ii<bs; ii++) { 412 for (jj=0; jj<bs; jj++) { 413 *bap++ = *value++; 414 } 415 } 416 } 417 noinsert2:; 418 ailen[row] = nrow; 419 PetscFunctionReturn(0); 420 } 421 422 /* 423 This routine is exactly duplicated in mpibaij.c 424 */ 425 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 426 { 427 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 428 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 429 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 430 PetscErrorCode ierr; 431 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 432 PetscBool roworiented=a->roworiented; 433 const PetscScalar *value = v; 434 MatScalar *ap,*aa = a->a,*bap; 435 436 PetscFunctionBegin; 437 rp = aj + ai[row]; 438 ap = aa + bs2*ai[row]; 439 rmax = imax[row]; 440 nrow = ailen[row]; 441 low = 0; 442 high = nrow; 443 value = v; 444 while (high-low > 7) { 445 t = (low+high)/2; 446 if (rp[t] > col) high = t; 447 else low = t; 448 } 449 for (i=low; i<high; i++) { 450 if (rp[i] > col) break; 451 if (rp[i] == col) { 452 bap = ap + bs2*i; 453 if (roworiented) { 454 if (is == ADD_VALUES) { 455 for (ii=0; ii<bs; ii++) { 456 for (jj=ii; jj<bs2; jj+=bs) { 457 bap[jj] += *value++; 458 } 459 } 460 } else { 461 for (ii=0; ii<bs; ii++) { 462 for (jj=ii; jj<bs2; jj+=bs) { 463 bap[jj] = *value++; 464 } 465 } 466 } 467 } else { 468 if (is == ADD_VALUES) { 469 for (ii=0; ii<bs; ii++,value+=bs) { 470 for (jj=0; jj<bs; jj++) { 471 bap[jj] += value[jj]; 472 } 473 bap += bs; 474 } 475 } else { 476 for (ii=0; ii<bs; ii++,value+=bs) { 477 for (jj=0; jj<bs; jj++) { 478 bap[jj] = value[jj]; 479 } 480 bap += bs; 481 } 482 } 483 } 484 goto noinsert2; 485 } 486 } 487 if (nonew == 1) goto noinsert2; 488 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 489 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 490 N = nrow++ - 1; high++; 491 /* shift up all the later entries in this row */ 492 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 493 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 494 rp[i] = col; 495 bap = ap + bs2*i; 496 if (roworiented) { 497 for (ii=0; ii<bs; ii++) { 498 for (jj=ii; jj<bs2; jj+=bs) { 499 bap[jj] = *value++; 500 } 501 } 502 } else { 503 for (ii=0; ii<bs; ii++) { 504 for (jj=0; jj<bs; jj++) { 505 *bap++ = *value++; 506 } 507 } 508 } 509 noinsert2:; 510 ailen[row] = nrow; 511 PetscFunctionReturn(0); 512 } 513 514 /* 515 This routine could be optimized by removing the need for the block copy below and passing stride information 516 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 517 */ 518 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 519 { 520 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 521 const MatScalar *value; 522 MatScalar *barray =baij->barray; 523 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 524 PetscErrorCode ierr; 525 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 526 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 527 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 528 529 PetscFunctionBegin; 530 if (!barray) { 531 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 532 baij->barray = barray; 533 } 534 535 if (roworiented) { 536 stepval = (n-1)*bs; 537 } else { 538 stepval = (m-1)*bs; 539 } 540 for (i=0; i<m; i++) { 541 if (im[i] < 0) continue; 542 #if defined(PETSC_USE_DEBUG) 543 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 544 #endif 545 if (im[i] >= rstart && im[i] < rend) { 546 row = im[i] - rstart; 547 for (j=0; j<n; j++) { 548 if (im[i] > in[j]) { 549 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 550 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 551 } 552 /* If NumCol = 1 then a copy is not required */ 553 if ((roworiented) && (n == 1)) { 554 barray = (MatScalar*) v + i*bs2; 555 } else if ((!roworiented) && (m == 1)) { 556 barray = (MatScalar*) v + j*bs2; 557 } else { /* Here a copy is required */ 558 if (roworiented) { 559 value = v + i*(stepval+bs)*bs + j*bs; 560 } else { 561 value = v + j*(stepval+bs)*bs + i*bs; 562 } 563 for (ii=0; ii<bs; ii++,value+=stepval) { 564 for (jj=0; jj<bs; jj++) { 565 *barray++ = *value++; 566 } 567 } 568 barray -=bs2; 569 } 570 571 if (in[j] >= cstart && in[j] < cend) { 572 col = in[j] - cstart; 573 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 574 } else if (in[j] < 0) continue; 575 #if defined(PETSC_USE_DEBUG) 576 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 577 #endif 578 else { 579 if (mat->was_assembled) { 580 if (!baij->colmap) { 581 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 582 } 583 584 #if defined(PETSC_USE_DEBUG) 585 #if defined(PETSC_USE_CTABLE) 586 { PetscInt data; 587 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 588 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 589 } 590 #else 591 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 592 #endif 593 #endif 594 #if defined(PETSC_USE_CTABLE) 595 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 596 col = (col - 1)/bs; 597 #else 598 col = (baij->colmap[in[j]] - 1)/bs; 599 #endif 600 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 601 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 602 col = in[j]; 603 } 604 } else col = in[j]; 605 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 606 } 607 } 608 } else { 609 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 610 if (!baij->donotstash) { 611 if (roworiented) { 612 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 613 } else { 614 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 615 } 616 } 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 623 { 624 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 625 PetscErrorCode ierr; 626 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 627 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 628 629 PetscFunctionBegin; 630 for (i=0; i<m; i++) { 631 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 632 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 633 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 634 row = idxm[i] - bsrstart; 635 for (j=0; j<n; j++) { 636 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 637 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 638 if (idxn[j] >= bscstart && idxn[j] < bscend) { 639 col = idxn[j] - bscstart; 640 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 641 } else { 642 if (!baij->colmap) { 643 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 644 } 645 #if defined(PETSC_USE_CTABLE) 646 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 647 data--; 648 #else 649 data = baij->colmap[idxn[j]/bs]-1; 650 #endif 651 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 652 else { 653 col = data + idxn[j]%bs; 654 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } 656 } 657 } 658 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 659 } 660 PetscFunctionReturn(0); 661 } 662 663 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 664 { 665 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 666 PetscErrorCode ierr; 667 PetscReal sum[2],*lnorm2; 668 669 PetscFunctionBegin; 670 if (baij->size == 1) { 671 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 672 } else { 673 if (type == NORM_FROBENIUS) { 674 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 675 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 676 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 677 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 678 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 679 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 680 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 681 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 682 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 683 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 684 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 685 PetscReal *rsum,*rsum2,vabs; 686 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 687 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 688 MatScalar *v; 689 690 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 691 ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr); 692 /* Amat */ 693 v = amat->a; jj = amat->j; 694 for (brow=0; brow<mbs; brow++) { 695 grow = bs*(rstart + brow); 696 nz = amat->i[brow+1] - amat->i[brow]; 697 for (bcol=0; bcol<nz; bcol++) { 698 gcol = bs*(rstart + *jj); jj++; 699 for (col=0; col<bs; col++) { 700 for (row=0; row<bs; row++) { 701 vabs = PetscAbsScalar(*v); v++; 702 rsum[gcol+col] += vabs; 703 /* non-diagonal block */ 704 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 705 } 706 } 707 } 708 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 709 } 710 /* Bmat */ 711 v = bmat->a; jj = bmat->j; 712 for (brow=0; brow<mbs; brow++) { 713 grow = bs*(rstart + brow); 714 nz = bmat->i[brow+1] - bmat->i[brow]; 715 for (bcol=0; bcol<nz; bcol++) { 716 gcol = bs*garray[*jj]; jj++; 717 for (col=0; col<bs; col++) { 718 for (row=0; row<bs; row++) { 719 vabs = PetscAbsScalar(*v); v++; 720 rsum[gcol+col] += vabs; 721 rsum[grow+row] += vabs; 722 } 723 } 724 } 725 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 726 } 727 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 728 *norm = 0.0; 729 for (col=0; col<mat->cmap->N; col++) { 730 if (rsum2[col] > *norm) *norm = rsum2[col]; 731 } 732 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 733 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 734 } 735 PetscFunctionReturn(0); 736 } 737 738 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 739 { 740 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 741 PetscErrorCode ierr; 742 PetscInt nstash,reallocs; 743 744 PetscFunctionBegin; 745 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 746 747 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 748 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 749 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 750 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 751 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 752 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 753 PetscFunctionReturn(0); 754 } 755 756 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 757 { 758 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 759 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 760 PetscErrorCode ierr; 761 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 762 PetscInt *row,*col; 763 PetscBool other_disassembled; 764 PetscMPIInt n; 765 PetscBool r1,r2,r3; 766 MatScalar *val; 767 768 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 769 PetscFunctionBegin; 770 if (!baij->donotstash && !mat->nooffprocentries) { 771 while (1) { 772 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 773 if (!flg) break; 774 775 for (i=0; i<n;) { 776 /* Now identify the consecutive vals belonging to the same row */ 777 for (j=i,rstart=row[j]; j<n; j++) { 778 if (row[j] != rstart) break; 779 } 780 if (j < n) ncols = j-i; 781 else ncols = n-i; 782 /* Now assemble all these values with a single function call */ 783 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 784 i = j; 785 } 786 } 787 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 788 /* Now process the block-stash. Since the values are stashed column-oriented, 789 set the roworiented flag to column oriented, and after MatSetValues() 790 restore the original flags */ 791 r1 = baij->roworiented; 792 r2 = a->roworiented; 793 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 794 795 baij->roworiented = PETSC_FALSE; 796 a->roworiented = PETSC_FALSE; 797 798 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 799 while (1) { 800 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 801 if (!flg) break; 802 803 for (i=0; i<n;) { 804 /* Now identify the consecutive vals belonging to the same row */ 805 for (j=i,rstart=row[j]; j<n; j++) { 806 if (row[j] != rstart) break; 807 } 808 if (j < n) ncols = j-i; 809 else ncols = n-i; 810 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 811 i = j; 812 } 813 } 814 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 815 816 baij->roworiented = r1; 817 a->roworiented = r2; 818 819 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 820 } 821 822 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 823 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 824 825 /* determine if any processor has disassembled, if so we must 826 also disassemble ourselfs, in order that we may reassemble. */ 827 /* 828 if nonzero structure of submatrix B cannot change then we know that 829 no processor disassembled thus we can skip this stuff 830 */ 831 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 832 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 833 if (mat->was_assembled && !other_disassembled) { 834 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 835 } 836 } 837 838 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 839 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 840 } 841 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 842 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 843 844 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 845 846 baij->rowvalues = 0; 847 848 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 849 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 850 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 851 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 852 } 853 PetscFunctionReturn(0); 854 } 855 856 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 857 #include <petscdraw.h> 858 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 859 { 860 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 861 PetscErrorCode ierr; 862 PetscInt bs = mat->rmap->bs; 863 PetscMPIInt rank = baij->rank; 864 PetscBool iascii,isdraw; 865 PetscViewer sviewer; 866 PetscViewerFormat format; 867 868 PetscFunctionBegin; 869 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 870 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 871 if (iascii) { 872 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 873 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 874 MatInfo info; 875 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 876 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 877 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 878 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 879 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 880 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 881 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 882 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 883 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 884 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 885 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 886 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 887 PetscFunctionReturn(0); 888 } else if (format == PETSC_VIEWER_ASCII_INFO) { 889 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 892 PetscFunctionReturn(0); 893 } 894 } 895 896 if (isdraw) { 897 PetscDraw draw; 898 PetscBool isnull; 899 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 900 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 901 if (isnull) PetscFunctionReturn(0); 902 } 903 904 { 905 /* assemble the entire matrix onto first processor. */ 906 Mat A; 907 Mat_SeqSBAIJ *Aloc; 908 Mat_SeqBAIJ *Bloc; 909 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 910 MatScalar *a; 911 const char *matname; 912 913 /* Should this be the same type as mat? */ 914 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 915 if (!rank) { 916 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 917 } else { 918 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 919 } 920 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 921 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 922 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 923 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 924 925 /* copy over the A part */ 926 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 927 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 928 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 929 930 for (i=0; i<mbs; i++) { 931 rvals[0] = bs*(baij->rstartbs + i); 932 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 933 for (j=ai[i]; j<ai[i+1]; j++) { 934 col = (baij->cstartbs+aj[j])*bs; 935 for (k=0; k<bs; k++) { 936 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 937 col++; 938 a += bs; 939 } 940 } 941 } 942 /* copy over the B part */ 943 Bloc = (Mat_SeqBAIJ*)baij->B->data; 944 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 945 for (i=0; i<mbs; i++) { 946 947 rvals[0] = bs*(baij->rstartbs + i); 948 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 949 for (j=ai[i]; j<ai[i+1]; j++) { 950 col = baij->garray[aj[j]]*bs; 951 for (k=0; k<bs; k++) { 952 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 953 col++; 954 a += bs; 955 } 956 } 957 } 958 ierr = PetscFree(rvals);CHKERRQ(ierr); 959 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 960 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 961 /* 962 Everyone has to call to draw the matrix since the graphics waits are 963 synchronized across all processors that share the PetscDraw object 964 */ 965 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 966 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 967 if (!rank) { 968 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 969 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 970 } 971 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 972 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 973 ierr = MatDestroy(&A);CHKERRQ(ierr); 974 } 975 PetscFunctionReturn(0); 976 } 977 978 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 979 { 980 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 981 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 982 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 983 PetscErrorCode ierr; 984 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 985 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 986 int fd; 987 PetscScalar *column_values; 988 FILE *file; 989 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 990 PetscInt message_count,flowcontrolcount; 991 992 PetscFunctionBegin; 993 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 994 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 995 nz = bs2*(A->nz + B->nz); 996 rlen = mat->rmap->n; 997 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 998 if (!rank) { 999 header[0] = MAT_FILE_CLASSID; 1000 header[1] = mat->rmap->N; 1001 header[2] = mat->cmap->N; 1002 1003 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1004 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1005 /* get largest number of rows any processor has */ 1006 range = mat->rmap->range; 1007 for (i=1; i<size; i++) { 1008 rlen = PetscMax(rlen,range[i+1] - range[i]); 1009 } 1010 } else { 1011 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1012 } 1013 1014 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1015 /* compute lengths of each row */ 1016 for (i=0; i<a->mbs; i++) { 1017 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1018 } 1019 /* store the row lengths to the file */ 1020 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1021 if (!rank) { 1022 MPI_Status status; 1023 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1024 rlen = (range[1] - range[0])/bs; 1025 for (i=0; i<rlen; i++) { 1026 for (j=0; j<bs; j++) { 1027 row_lens[i*bs+j] = bs*crow_lens[i]; 1028 } 1029 } 1030 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1031 for (i=1; i<size; i++) { 1032 rlen = (range[i+1] - range[i])/bs; 1033 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1034 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1035 for (k=0; k<rlen; k++) { 1036 for (j=0; j<bs; j++) { 1037 row_lens[k*bs+j] = bs*crow_lens[k]; 1038 } 1039 } 1040 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1041 } 1042 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1043 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1044 } else { 1045 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1046 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1047 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1048 } 1049 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1050 1051 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1052 information needed to make it for each row from a block row. This does require more communication but still not more than 1053 the communication needed for the nonzero values */ 1054 nzmax = nz; /* space a largest processor needs */ 1055 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1056 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1057 cnt = 0; 1058 for (i=0; i<a->mbs; i++) { 1059 pcnt = cnt; 1060 for (j=B->i[i]; j<B->i[i+1]; j++) { 1061 if ((col = garray[B->j[j]]) > cstart) break; 1062 for (l=0; l<bs; l++) { 1063 column_indices[cnt++] = bs*col+l; 1064 } 1065 } 1066 for (k=A->i[i]; k<A->i[i+1]; k++) { 1067 for (l=0; l<bs; l++) { 1068 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1069 } 1070 } 1071 for (; j<B->i[i+1]; j++) { 1072 for (l=0; l<bs; l++) { 1073 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1074 } 1075 } 1076 len = cnt - pcnt; 1077 for (k=1; k<bs; k++) { 1078 ierr = PetscArraycpy(&column_indices[cnt],&column_indices[pcnt],len);CHKERRQ(ierr); 1079 cnt += len; 1080 } 1081 } 1082 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1083 1084 /* store the columns to the file */ 1085 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1086 if (!rank) { 1087 MPI_Status status; 1088 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1089 for (i=1; i<size; i++) { 1090 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1091 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1092 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1093 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1094 } 1095 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1096 } else { 1097 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1098 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1099 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1100 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1101 } 1102 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1103 1104 /* load up the numerical values */ 1105 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1106 cnt = 0; 1107 for (i=0; i<a->mbs; i++) { 1108 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1109 for (j=B->i[i]; j<B->i[i+1]; j++) { 1110 if (garray[B->j[j]] > cstart) break; 1111 for (l=0; l<bs; l++) { 1112 for (ll=0; ll<bs; ll++) { 1113 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1114 } 1115 } 1116 cnt += bs; 1117 } 1118 for (k=A->i[i]; k<A->i[i+1]; k++) { 1119 for (l=0; l<bs; l++) { 1120 for (ll=0; ll<bs; ll++) { 1121 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1122 } 1123 } 1124 cnt += bs; 1125 } 1126 for (; j<B->i[i+1]; j++) { 1127 for (l=0; l<bs; l++) { 1128 for (ll=0; ll<bs; ll++) { 1129 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1130 } 1131 } 1132 cnt += bs; 1133 } 1134 cnt += (bs-1)*rlen; 1135 } 1136 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1137 1138 /* store the column values to the file */ 1139 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1140 if (!rank) { 1141 MPI_Status status; 1142 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1143 for (i=1; i<size; i++) { 1144 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1145 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1146 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1147 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1148 } 1149 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1150 } else { 1151 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1152 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1153 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1154 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1155 } 1156 ierr = PetscFree(column_values);CHKERRQ(ierr); 1157 1158 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1159 if (file) { 1160 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1161 } 1162 PetscFunctionReturn(0); 1163 } 1164 1165 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1166 { 1167 PetscErrorCode ierr; 1168 PetscBool iascii,isdraw,issocket,isbinary; 1169 1170 PetscFunctionBegin; 1171 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1172 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1173 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1174 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1175 if (iascii || isdraw || issocket) { 1176 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1177 } else if (isbinary) { 1178 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1179 } 1180 PetscFunctionReturn(0); 1181 } 1182 1183 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1184 { 1185 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1186 PetscErrorCode ierr; 1187 1188 PetscFunctionBegin; 1189 #if defined(PETSC_USE_LOG) 1190 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1191 #endif 1192 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1193 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1194 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1195 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1196 #if defined(PETSC_USE_CTABLE) 1197 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1198 #else 1199 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1200 #endif 1201 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1202 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1203 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1204 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1205 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1206 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1207 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1208 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1209 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1210 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1211 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1212 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1213 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1214 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1215 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1216 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1217 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1218 #endif 1219 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1220 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1221 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1222 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1223 1224 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1229 #if defined(PETSC_HAVE_ELEMENTAL) 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1231 #endif 1232 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1238 { 1239 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1240 PetscErrorCode ierr; 1241 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1242 PetscScalar *from; 1243 const PetscScalar *x; 1244 1245 PetscFunctionBegin; 1246 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1247 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1248 1249 /* diagonal part */ 1250 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1251 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1252 1253 /* subdiagonal part */ 1254 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1255 1256 /* copy x into the vec slvec0 */ 1257 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1258 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1259 1260 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1261 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1262 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1263 1264 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 /* supperdiagonal part */ 1267 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1272 { 1273 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1274 PetscErrorCode ierr; 1275 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1276 PetscScalar *from; 1277 const PetscScalar *x; 1278 1279 PetscFunctionBegin; 1280 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1281 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1282 1283 /* diagonal part */ 1284 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1285 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1286 1287 /* subdiagonal part */ 1288 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1289 1290 /* copy x into the vec slvec0 */ 1291 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1292 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1293 1294 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1295 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1296 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1297 1298 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1299 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1300 /* supperdiagonal part */ 1301 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1306 { 1307 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1308 PetscErrorCode ierr; 1309 PetscInt nt; 1310 1311 PetscFunctionBegin; 1312 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1313 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1314 1315 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1316 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1317 1318 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1319 /* do diagonal part */ 1320 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1321 /* do supperdiagonal part */ 1322 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1323 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1324 /* do subdiagonal part */ 1325 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1326 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1327 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1332 { 1333 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1334 PetscErrorCode ierr; 1335 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1336 PetscScalar *from,zero=0.0; 1337 const PetscScalar *x; 1338 1339 PetscFunctionBegin; 1340 /* 1341 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1342 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1343 */ 1344 /* diagonal part */ 1345 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1346 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1347 1348 /* subdiagonal part */ 1349 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1350 1351 /* copy x into the vec slvec0 */ 1352 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1353 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1354 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1355 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1356 1357 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1358 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1359 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1360 1361 /* supperdiagonal part */ 1362 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1367 { 1368 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1369 PetscErrorCode ierr; 1370 1371 PetscFunctionBegin; 1372 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1373 /* do diagonal part */ 1374 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1375 /* do supperdiagonal part */ 1376 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1377 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1378 1379 /* do subdiagonal part */ 1380 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1381 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1382 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 /* 1387 This only works correctly for square matrices where the subblock A->A is the 1388 diagonal block 1389 */ 1390 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1391 { 1392 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1397 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 1401 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1402 { 1403 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1404 PetscErrorCode ierr; 1405 1406 PetscFunctionBegin; 1407 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1408 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 1412 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1413 { 1414 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1415 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1416 PetscErrorCode ierr; 1417 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1418 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1419 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1420 1421 PetscFunctionBegin; 1422 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1423 mat->getrowactive = PETSC_TRUE; 1424 1425 if (!mat->rowvalues && (idx || v)) { 1426 /* 1427 allocate enough space to hold information from the longest row. 1428 */ 1429 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1430 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1431 PetscInt max = 1,mbs = mat->mbs,tmp; 1432 for (i=0; i<mbs; i++) { 1433 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1434 if (max < tmp) max = tmp; 1435 } 1436 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1437 } 1438 1439 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1440 lrow = row - brstart; /* local row index */ 1441 1442 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1443 if (!v) {pvA = 0; pvB = 0;} 1444 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1445 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1446 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1447 nztot = nzA + nzB; 1448 1449 cmap = mat->garray; 1450 if (v || idx) { 1451 if (nztot) { 1452 /* Sort by increasing column numbers, assuming A and B already sorted */ 1453 PetscInt imark = -1; 1454 if (v) { 1455 *v = v_p = mat->rowvalues; 1456 for (i=0; i<nzB; i++) { 1457 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1458 else break; 1459 } 1460 imark = i; 1461 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1462 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1463 } 1464 if (idx) { 1465 *idx = idx_p = mat->rowindices; 1466 if (imark > -1) { 1467 for (i=0; i<imark; i++) { 1468 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1469 } 1470 } else { 1471 for (i=0; i<nzB; i++) { 1472 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1473 else break; 1474 } 1475 imark = i; 1476 } 1477 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1478 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1479 } 1480 } else { 1481 if (idx) *idx = 0; 1482 if (v) *v = 0; 1483 } 1484 } 1485 *nz = nztot; 1486 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1487 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1488 PetscFunctionReturn(0); 1489 } 1490 1491 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1492 { 1493 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1494 1495 PetscFunctionBegin; 1496 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1497 baij->getrowactive = PETSC_FALSE; 1498 PetscFunctionReturn(0); 1499 } 1500 1501 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1502 { 1503 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1504 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1505 1506 PetscFunctionBegin; 1507 aA->getrow_utriangular = PETSC_TRUE; 1508 PetscFunctionReturn(0); 1509 } 1510 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1511 { 1512 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1513 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1514 1515 PetscFunctionBegin; 1516 aA->getrow_utriangular = PETSC_FALSE; 1517 PetscFunctionReturn(0); 1518 } 1519 1520 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1521 { 1522 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1523 PetscErrorCode ierr; 1524 1525 PetscFunctionBegin; 1526 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1527 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1528 PetscFunctionReturn(0); 1529 } 1530 1531 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1532 { 1533 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1538 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1539 PetscFunctionReturn(0); 1540 } 1541 1542 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1543 Input: isrow - distributed(parallel), 1544 iscol_local - locally owned (seq) 1545 */ 1546 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1547 { 1548 PetscErrorCode ierr; 1549 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1550 const PetscInt *ptr1,*ptr2; 1551 1552 PetscFunctionBegin; 1553 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1554 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1555 if (sz1 > sz2) { 1556 *flg = PETSC_FALSE; 1557 PetscFunctionReturn(0); 1558 } 1559 1560 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1561 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1562 1563 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1564 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1565 ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr); 1566 ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr); 1567 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1568 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1569 1570 nmatch=0; 1571 k = 0; 1572 for (i=0; i<sz1; i++){ 1573 for (j=k; j<sz2; j++){ 1574 if (a1[i] == a2[j]) { 1575 k = j; nmatch++; 1576 break; 1577 } 1578 } 1579 } 1580 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1581 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1582 ierr = PetscFree(a1);CHKERRQ(ierr); 1583 ierr = PetscFree(a2);CHKERRQ(ierr); 1584 if (nmatch < sz1) { 1585 *flg = PETSC_FALSE; 1586 } else { 1587 *flg = PETSC_TRUE; 1588 } 1589 PetscFunctionReturn(0); 1590 } 1591 1592 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1593 { 1594 PetscErrorCode ierr; 1595 IS iscol_local; 1596 PetscInt csize; 1597 PetscBool isequal; 1598 1599 PetscFunctionBegin; 1600 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1601 if (call == MAT_REUSE_MATRIX) { 1602 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1603 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1604 } else { 1605 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1606 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1607 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1608 } 1609 1610 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1611 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1612 if (call == MAT_INITIAL_MATRIX) { 1613 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1614 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1615 } 1616 PetscFunctionReturn(0); 1617 } 1618 1619 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1620 { 1621 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1622 PetscErrorCode ierr; 1623 1624 PetscFunctionBegin; 1625 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1626 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1631 { 1632 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1633 Mat A = a->A,B = a->B; 1634 PetscErrorCode ierr; 1635 PetscLogDouble isend[5],irecv[5]; 1636 1637 PetscFunctionBegin; 1638 info->block_size = (PetscReal)matin->rmap->bs; 1639 1640 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1641 1642 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1643 isend[3] = info->memory; isend[4] = info->mallocs; 1644 1645 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1646 1647 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1648 isend[3] += info->memory; isend[4] += info->mallocs; 1649 if (flag == MAT_LOCAL) { 1650 info->nz_used = isend[0]; 1651 info->nz_allocated = isend[1]; 1652 info->nz_unneeded = isend[2]; 1653 info->memory = isend[3]; 1654 info->mallocs = isend[4]; 1655 } else if (flag == MAT_GLOBAL_MAX) { 1656 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1657 1658 info->nz_used = irecv[0]; 1659 info->nz_allocated = irecv[1]; 1660 info->nz_unneeded = irecv[2]; 1661 info->memory = irecv[3]; 1662 info->mallocs = irecv[4]; 1663 } else if (flag == MAT_GLOBAL_SUM) { 1664 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1672 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1673 info->fill_ratio_needed = 0; 1674 info->factor_mallocs = 0; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1679 { 1680 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1681 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1682 PetscErrorCode ierr; 1683 1684 PetscFunctionBegin; 1685 switch (op) { 1686 case MAT_NEW_NONZERO_LOCATIONS: 1687 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1688 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1689 case MAT_KEEP_NONZERO_PATTERN: 1690 case MAT_SUBMAT_SINGLEIS: 1691 case MAT_NEW_NONZERO_LOCATION_ERR: 1692 MatCheckPreallocated(A,1); 1693 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1694 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1695 break; 1696 case MAT_ROW_ORIENTED: 1697 MatCheckPreallocated(A,1); 1698 a->roworiented = flg; 1699 1700 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1701 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1702 break; 1703 case MAT_NEW_DIAGONALS: 1704 case MAT_SORTED_FULL: 1705 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1706 break; 1707 case MAT_IGNORE_OFF_PROC_ENTRIES: 1708 a->donotstash = flg; 1709 break; 1710 case MAT_USE_HASH_TABLE: 1711 a->ht_flag = flg; 1712 break; 1713 case MAT_HERMITIAN: 1714 MatCheckPreallocated(A,1); 1715 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1716 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1717 #if defined(PETSC_USE_COMPLEX) 1718 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1719 #endif 1720 break; 1721 case MAT_SPD: 1722 A->spd_set = PETSC_TRUE; 1723 A->spd = flg; 1724 if (flg) { 1725 A->symmetric = PETSC_TRUE; 1726 A->structurally_symmetric = PETSC_TRUE; 1727 A->symmetric_set = PETSC_TRUE; 1728 A->structurally_symmetric_set = PETSC_TRUE; 1729 } 1730 break; 1731 case MAT_SYMMETRIC: 1732 MatCheckPreallocated(A,1); 1733 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1734 break; 1735 case MAT_STRUCTURALLY_SYMMETRIC: 1736 MatCheckPreallocated(A,1); 1737 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1738 break; 1739 case MAT_SYMMETRY_ETERNAL: 1740 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1741 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1742 break; 1743 case MAT_IGNORE_LOWER_TRIANGULAR: 1744 aA->ignore_ltriangular = flg; 1745 break; 1746 case MAT_ERROR_LOWER_TRIANGULAR: 1747 aA->ignore_ltriangular = flg; 1748 break; 1749 case MAT_GETROW_UPPERTRIANGULAR: 1750 aA->getrow_utriangular = flg; 1751 break; 1752 default: 1753 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1754 } 1755 PetscFunctionReturn(0); 1756 } 1757 1758 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1759 { 1760 PetscErrorCode ierr; 1761 1762 PetscFunctionBegin; 1763 if (reuse == MAT_INITIAL_MATRIX) { 1764 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1765 } else if (reuse == MAT_REUSE_MATRIX) { 1766 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1767 } 1768 PetscFunctionReturn(0); 1769 } 1770 1771 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1772 { 1773 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1774 Mat a = baij->A, b=baij->B; 1775 PetscErrorCode ierr; 1776 PetscInt nv,m,n; 1777 PetscBool flg; 1778 1779 PetscFunctionBegin; 1780 if (ll != rr) { 1781 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1782 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1783 } 1784 if (!ll) PetscFunctionReturn(0); 1785 1786 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1787 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1788 1789 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1790 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1791 1792 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 1794 /* left diagonalscale the off-diagonal part */ 1795 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1796 1797 /* scale the diagonal part */ 1798 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1799 1800 /* right diagonalscale the off-diagonal part */ 1801 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1802 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1803 PetscFunctionReturn(0); 1804 } 1805 1806 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1807 { 1808 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1809 PetscErrorCode ierr; 1810 1811 PetscFunctionBegin; 1812 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 1816 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1817 1818 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1819 { 1820 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1821 Mat a,b,c,d; 1822 PetscBool flg; 1823 PetscErrorCode ierr; 1824 1825 PetscFunctionBegin; 1826 a = matA->A; b = matA->B; 1827 c = matB->A; d = matB->B; 1828 1829 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1830 if (flg) { 1831 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1832 } 1833 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1834 PetscFunctionReturn(0); 1835 } 1836 1837 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1838 { 1839 PetscErrorCode ierr; 1840 PetscBool isbaij; 1841 1842 PetscFunctionBegin; 1843 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 1844 if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1845 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1846 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1847 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1848 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1849 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1850 } else { 1851 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1852 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1853 1854 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1855 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1856 } 1857 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1862 { 1863 PetscErrorCode ierr; 1864 1865 PetscFunctionBegin; 1866 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1867 PetscFunctionReturn(0); 1868 } 1869 1870 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1871 { 1872 PetscErrorCode ierr; 1873 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1874 PetscBLASInt bnz,one=1; 1875 Mat_SeqSBAIJ *xa,*ya; 1876 Mat_SeqBAIJ *xb,*yb; 1877 1878 PetscFunctionBegin; 1879 if (str == SAME_NONZERO_PATTERN) { 1880 PetscScalar alpha = a; 1881 xa = (Mat_SeqSBAIJ*)xx->A->data; 1882 ya = (Mat_SeqSBAIJ*)yy->A->data; 1883 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1884 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1885 xb = (Mat_SeqBAIJ*)xx->B->data; 1886 yb = (Mat_SeqBAIJ*)yy->B->data; 1887 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1888 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1889 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1890 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1891 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1892 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1893 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1894 } else { 1895 Mat B; 1896 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1897 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1898 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1899 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1900 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1901 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1902 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1903 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1904 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1905 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1906 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1907 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1908 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1909 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1910 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1911 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1912 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1913 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1914 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1915 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1916 } 1917 PetscFunctionReturn(0); 1918 } 1919 1920 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1921 { 1922 PetscErrorCode ierr; 1923 PetscInt i; 1924 PetscBool flg; 1925 1926 PetscFunctionBegin; 1927 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1928 for (i=0; i<n; i++) { 1929 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1930 if (!flg) { 1931 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1932 } 1933 } 1934 PetscFunctionReturn(0); 1935 } 1936 1937 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1938 { 1939 PetscErrorCode ierr; 1940 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1941 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1942 1943 PetscFunctionBegin; 1944 if (!Y->preallocated) { 1945 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1946 } else if (!aij->nz) { 1947 PetscInt nonew = aij->nonew; 1948 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1949 aij->nonew = nonew; 1950 } 1951 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1952 PetscFunctionReturn(0); 1953 } 1954 1955 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1956 { 1957 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1958 PetscErrorCode ierr; 1959 1960 PetscFunctionBegin; 1961 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1962 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1963 if (d) { 1964 PetscInt rstart; 1965 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1966 *d += rstart/A->rmap->bs; 1967 1968 } 1969 PetscFunctionReturn(0); 1970 } 1971 1972 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1973 { 1974 PetscFunctionBegin; 1975 *a = ((Mat_MPISBAIJ*)A->data)->A; 1976 PetscFunctionReturn(0); 1977 } 1978 1979 /* -------------------------------------------------------------------*/ 1980 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1981 MatGetRow_MPISBAIJ, 1982 MatRestoreRow_MPISBAIJ, 1983 MatMult_MPISBAIJ, 1984 /* 4*/ MatMultAdd_MPISBAIJ, 1985 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1986 MatMultAdd_MPISBAIJ, 1987 0, 1988 0, 1989 0, 1990 /* 10*/ 0, 1991 0, 1992 0, 1993 MatSOR_MPISBAIJ, 1994 MatTranspose_MPISBAIJ, 1995 /* 15*/ MatGetInfo_MPISBAIJ, 1996 MatEqual_MPISBAIJ, 1997 MatGetDiagonal_MPISBAIJ, 1998 MatDiagonalScale_MPISBAIJ, 1999 MatNorm_MPISBAIJ, 2000 /* 20*/ MatAssemblyBegin_MPISBAIJ, 2001 MatAssemblyEnd_MPISBAIJ, 2002 MatSetOption_MPISBAIJ, 2003 MatZeroEntries_MPISBAIJ, 2004 /* 24*/ 0, 2005 0, 2006 0, 2007 0, 2008 0, 2009 /* 29*/ MatSetUp_MPISBAIJ, 2010 0, 2011 0, 2012 MatGetDiagonalBlock_MPISBAIJ, 2013 0, 2014 /* 34*/ MatDuplicate_MPISBAIJ, 2015 0, 2016 0, 2017 0, 2018 0, 2019 /* 39*/ MatAXPY_MPISBAIJ, 2020 MatCreateSubMatrices_MPISBAIJ, 2021 MatIncreaseOverlap_MPISBAIJ, 2022 MatGetValues_MPISBAIJ, 2023 MatCopy_MPISBAIJ, 2024 /* 44*/ 0, 2025 MatScale_MPISBAIJ, 2026 MatShift_MPISBAIJ, 2027 0, 2028 0, 2029 /* 49*/ 0, 2030 0, 2031 0, 2032 0, 2033 0, 2034 /* 54*/ 0, 2035 0, 2036 MatSetUnfactored_MPISBAIJ, 2037 0, 2038 MatSetValuesBlocked_MPISBAIJ, 2039 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 2040 0, 2041 0, 2042 0, 2043 0, 2044 /* 64*/ 0, 2045 0, 2046 0, 2047 0, 2048 0, 2049 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 2050 0, 2051 MatConvert_MPISBAIJ_Basic, 2052 0, 2053 0, 2054 /* 74*/ 0, 2055 0, 2056 0, 2057 0, 2058 0, 2059 /* 79*/ 0, 2060 0, 2061 0, 2062 0, 2063 MatLoad_MPISBAIJ, 2064 /* 84*/ 0, 2065 0, 2066 0, 2067 0, 2068 0, 2069 /* 89*/ 0, 2070 0, 2071 0, 2072 0, 2073 0, 2074 /* 94*/ 0, 2075 0, 2076 0, 2077 0, 2078 0, 2079 /* 99*/ 0, 2080 0, 2081 0, 2082 0, 2083 0, 2084 /*104*/ 0, 2085 MatRealPart_MPISBAIJ, 2086 MatImaginaryPart_MPISBAIJ, 2087 MatGetRowUpperTriangular_MPISBAIJ, 2088 MatRestoreRowUpperTriangular_MPISBAIJ, 2089 /*109*/ 0, 2090 0, 2091 0, 2092 0, 2093 MatMissingDiagonal_MPISBAIJ, 2094 /*114*/ 0, 2095 0, 2096 0, 2097 0, 2098 0, 2099 /*119*/ 0, 2100 0, 2101 0, 2102 0, 2103 0, 2104 /*124*/ 0, 2105 0, 2106 0, 2107 0, 2108 0, 2109 /*129*/ 0, 2110 0, 2111 0, 2112 0, 2113 0, 2114 /*134*/ 0, 2115 0, 2116 0, 2117 0, 2118 0, 2119 /*139*/ MatSetBlockSizes_Default, 2120 0, 2121 0, 2122 0, 2123 0, 2124 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2125 }; 2126 2127 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2128 { 2129 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 2130 PetscErrorCode ierr; 2131 PetscInt i,mbs,Mbs; 2132 PetscMPIInt size; 2133 2134 PetscFunctionBegin; 2135 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2136 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2137 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2138 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2139 if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N); 2140 if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n); 2141 2142 mbs = B->rmap->n/bs; 2143 Mbs = B->rmap->N/bs; 2144 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2145 2146 B->rmap->bs = bs; 2147 b->bs2 = bs*bs; 2148 b->mbs = mbs; 2149 b->Mbs = Mbs; 2150 b->nbs = B->cmap->n/bs; 2151 b->Nbs = B->cmap->N/bs; 2152 2153 for (i=0; i<=b->size; i++) { 2154 b->rangebs[i] = B->rmap->range[i]/bs; 2155 } 2156 b->rstartbs = B->rmap->rstart/bs; 2157 b->rendbs = B->rmap->rend/bs; 2158 2159 b->cstartbs = B->cmap->rstart/bs; 2160 b->cendbs = B->cmap->rend/bs; 2161 2162 #if defined(PETSC_USE_CTABLE) 2163 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2164 #else 2165 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2166 #endif 2167 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2168 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2169 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2170 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2171 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2172 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2173 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2174 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2175 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2176 2177 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2178 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2179 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2180 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2181 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2182 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2183 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2184 2185 if (!B->preallocated) { 2186 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2187 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2188 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2189 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2190 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2191 } 2192 2193 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2194 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2195 2196 B->preallocated = PETSC_TRUE; 2197 B->was_assembled = PETSC_FALSE; 2198 B->assembled = PETSC_FALSE; 2199 PetscFunctionReturn(0); 2200 } 2201 2202 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2203 { 2204 PetscInt m,rstart,cend; 2205 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0; 2206 const PetscInt *JJ =0; 2207 PetscScalar *values=0; 2208 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 2209 PetscErrorCode ierr; 2210 PetscBool nooffprocentries; 2211 2212 PetscFunctionBegin; 2213 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2214 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2215 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2216 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2217 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2218 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2219 m = B->rmap->n/bs; 2220 rstart = B->rmap->rstart/bs; 2221 cend = B->cmap->rend/bs; 2222 2223 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2224 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2225 for (i=0; i<m; i++) { 2226 nz = ii[i+1] - ii[i]; 2227 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2228 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2229 JJ = jj + ii[i]; 2230 bd = 0; 2231 for (j=0; j<nz; j++) { 2232 if (*JJ >= i + rstart) break; 2233 JJ++; 2234 bd++; 2235 } 2236 d = 0; 2237 for (; j<nz; j++) { 2238 if (*JJ++ >= cend) break; 2239 d++; 2240 } 2241 d_nnz[i] = d; 2242 o_nnz[i] = nz - d - bd; 2243 nz = nz - bd; 2244 nz_max = PetscMax(nz_max,nz); 2245 } 2246 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2247 ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2248 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2249 2250 values = (PetscScalar*)V; 2251 if (!values) { 2252 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2253 } 2254 for (i=0; i<m; i++) { 2255 PetscInt row = i + rstart; 2256 PetscInt ncols = ii[i+1] - ii[i]; 2257 const PetscInt *icols = jj + ii[i]; 2258 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2259 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2260 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2261 } else { /* block ordering does not match so we can only insert one block at a time. */ 2262 PetscInt j; 2263 for (j=0; j<ncols; j++) { 2264 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2265 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2266 } 2267 } 2268 } 2269 2270 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2271 nooffprocentries = B->nooffprocentries; 2272 B->nooffprocentries = PETSC_TRUE; 2273 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2274 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2275 B->nooffprocentries = nooffprocentries; 2276 2277 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2278 PetscFunctionReturn(0); 2279 } 2280 2281 /*MC 2282 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2283 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2284 the matrix is stored. 2285 2286 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2287 can call MatSetOption(Mat, MAT_HERMITIAN); 2288 2289 Options Database Keys: 2290 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2291 2292 Notes: 2293 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2294 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2295 2296 Level: beginner 2297 2298 .seealso: MatCreateMPISBAIJ(), MATSEQSBAIJ, MatType 2299 M*/ 2300 2301 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2302 { 2303 Mat_MPISBAIJ *b; 2304 PetscErrorCode ierr; 2305 PetscBool flg = PETSC_FALSE; 2306 2307 PetscFunctionBegin; 2308 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2309 B->data = (void*)b; 2310 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2311 2312 B->ops->destroy = MatDestroy_MPISBAIJ; 2313 B->ops->view = MatView_MPISBAIJ; 2314 B->assembled = PETSC_FALSE; 2315 B->insertmode = NOT_SET_VALUES; 2316 2317 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2318 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2319 2320 /* build local table of row and column ownerships */ 2321 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2322 2323 /* build cache for off array entries formed */ 2324 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2325 2326 b->donotstash = PETSC_FALSE; 2327 b->colmap = NULL; 2328 b->garray = NULL; 2329 b->roworiented = PETSC_TRUE; 2330 2331 /* stuff used in block assembly */ 2332 b->barray = 0; 2333 2334 /* stuff used for matrix vector multiply */ 2335 b->lvec = 0; 2336 b->Mvctx = 0; 2337 b->slvec0 = 0; 2338 b->slvec0b = 0; 2339 b->slvec1 = 0; 2340 b->slvec1a = 0; 2341 b->slvec1b = 0; 2342 b->sMvctx = 0; 2343 2344 /* stuff for MatGetRow() */ 2345 b->rowindices = 0; 2346 b->rowvalues = 0; 2347 b->getrowactive = PETSC_FALSE; 2348 2349 /* hash table stuff */ 2350 b->ht = 0; 2351 b->hd = 0; 2352 b->ht_size = 0; 2353 b->ht_flag = PETSC_FALSE; 2354 b->ht_fact = 0; 2355 b->ht_total_ct = 0; 2356 b->ht_insert_ct = 0; 2357 2358 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2359 b->ijonly = PETSC_FALSE; 2360 2361 b->in_loc = 0; 2362 b->v_loc = 0; 2363 b->n_loc = 0; 2364 2365 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2366 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2367 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2368 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2369 #if defined(PETSC_HAVE_ELEMENTAL) 2370 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2371 #endif 2372 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2373 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2374 2375 B->symmetric = PETSC_TRUE; 2376 B->structurally_symmetric = PETSC_TRUE; 2377 B->symmetric_set = PETSC_TRUE; 2378 B->structurally_symmetric_set = PETSC_TRUE; 2379 B->symmetric_eternal = PETSC_TRUE; 2380 2381 B->hermitian = PETSC_FALSE; 2382 B->hermitian_set = PETSC_FALSE; 2383 2384 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2385 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2386 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2387 if (flg) { 2388 PetscReal fact = 1.39; 2389 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2390 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2391 if (fact <= 1.0) fact = 1.39; 2392 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2393 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2394 } 2395 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2396 PetscFunctionReturn(0); 2397 } 2398 2399 /*MC 2400 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2401 2402 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2403 and MATMPISBAIJ otherwise. 2404 2405 Options Database Keys: 2406 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2407 2408 Level: beginner 2409 2410 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2411 M*/ 2412 2413 /*@C 2414 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2415 the user should preallocate the matrix storage by setting the parameters 2416 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2417 performance can be increased by more than a factor of 50. 2418 2419 Collective on Mat 2420 2421 Input Parameters: 2422 + B - the matrix 2423 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2424 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2425 . d_nz - number of block nonzeros per block row in diagonal portion of local 2426 submatrix (same for all local rows) 2427 . d_nnz - array containing the number of block nonzeros in the various block rows 2428 in the upper triangular and diagonal part of the in diagonal portion of the local 2429 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2430 for the diagonal entry and set a value even if it is zero. 2431 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2432 submatrix (same for all local rows). 2433 - o_nnz - array containing the number of nonzeros in the various block rows of the 2434 off-diagonal portion of the local submatrix that is right of the diagonal 2435 (possibly different for each block row) or NULL. 2436 2437 2438 Options Database Keys: 2439 + -mat_no_unroll - uses code that does not unroll the loops in the 2440 block calculations (much slower) 2441 - -mat_block_size - size of the blocks to use 2442 2443 Notes: 2444 2445 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2446 than it must be used on all processors that share the object for that argument. 2447 2448 If the *_nnz parameter is given then the *_nz parameter is ignored 2449 2450 Storage Information: 2451 For a square global matrix we define each processor's diagonal portion 2452 to be its local rows and the corresponding columns (a square submatrix); 2453 each processor's off-diagonal portion encompasses the remainder of the 2454 local matrix (a rectangular submatrix). 2455 2456 The user can specify preallocated storage for the diagonal part of 2457 the local submatrix with either d_nz or d_nnz (not both). Set 2458 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2459 memory allocation. Likewise, specify preallocated storage for the 2460 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2461 2462 You can call MatGetInfo() to get information on how effective the preallocation was; 2463 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2464 You can also run with the option -info and look for messages with the string 2465 malloc in them to see if additional memory allocation was needed. 2466 2467 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2468 the figure below we depict these three local rows and all columns (0-11). 2469 2470 .vb 2471 0 1 2 3 4 5 6 7 8 9 10 11 2472 -------------------------- 2473 row 3 |. . . d d d o o o o o o 2474 row 4 |. . . d d d o o o o o o 2475 row 5 |. . . d d d o o o o o o 2476 -------------------------- 2477 .ve 2478 2479 Thus, any entries in the d locations are stored in the d (diagonal) 2480 submatrix, and any entries in the o locations are stored in the 2481 o (off-diagonal) submatrix. Note that the d matrix is stored in 2482 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2483 2484 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2485 plus the diagonal part of the d matrix, 2486 and o_nz should indicate the number of block nonzeros per row in the o matrix 2487 2488 In general, for PDE problems in which most nonzeros are near the diagonal, 2489 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2490 or you will get TERRIBLE performance; see the users' manual chapter on 2491 matrices. 2492 2493 Level: intermediate 2494 2495 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2496 @*/ 2497 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2498 { 2499 PetscErrorCode ierr; 2500 2501 PetscFunctionBegin; 2502 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2503 PetscValidType(B,1); 2504 PetscValidLogicalCollectiveInt(B,bs,2); 2505 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 /*@C 2510 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2511 (block compressed row). For good matrix assembly performance 2512 the user should preallocate the matrix storage by setting the parameters 2513 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2514 performance can be increased by more than a factor of 50. 2515 2516 Collective 2517 2518 Input Parameters: 2519 + comm - MPI communicator 2520 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2521 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2522 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2523 This value should be the same as the local size used in creating the 2524 y vector for the matrix-vector product y = Ax. 2525 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2526 This value should be the same as the local size used in creating the 2527 x vector for the matrix-vector product y = Ax. 2528 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2529 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2530 . d_nz - number of block nonzeros per block row in diagonal portion of local 2531 submatrix (same for all local rows) 2532 . d_nnz - array containing the number of block nonzeros in the various block rows 2533 in the upper triangular portion of the in diagonal portion of the local 2534 (possibly different for each block block row) or NULL. 2535 If you plan to factor the matrix you must leave room for the diagonal entry and 2536 set its value even if it is zero. 2537 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2538 submatrix (same for all local rows). 2539 - o_nnz - array containing the number of nonzeros in the various block rows of the 2540 off-diagonal portion of the local submatrix (possibly different for 2541 each block row) or NULL. 2542 2543 Output Parameter: 2544 . A - the matrix 2545 2546 Options Database Keys: 2547 + -mat_no_unroll - uses code that does not unroll the loops in the 2548 block calculations (much slower) 2549 . -mat_block_size - size of the blocks to use 2550 - -mat_mpi - use the parallel matrix data structures even on one processor 2551 (defaults to using SeqBAIJ format on one processor) 2552 2553 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2554 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2555 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2556 2557 Notes: 2558 The number of rows and columns must be divisible by blocksize. 2559 This matrix type does not support complex Hermitian operation. 2560 2561 The user MUST specify either the local or global matrix dimensions 2562 (possibly both). 2563 2564 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2565 than it must be used on all processors that share the object for that argument. 2566 2567 If the *_nnz parameter is given then the *_nz parameter is ignored 2568 2569 Storage Information: 2570 For a square global matrix we define each processor's diagonal portion 2571 to be its local rows and the corresponding columns (a square submatrix); 2572 each processor's off-diagonal portion encompasses the remainder of the 2573 local matrix (a rectangular submatrix). 2574 2575 The user can specify preallocated storage for the diagonal part of 2576 the local submatrix with either d_nz or d_nnz (not both). Set 2577 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2578 memory allocation. Likewise, specify preallocated storage for the 2579 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2580 2581 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2582 the figure below we depict these three local rows and all columns (0-11). 2583 2584 .vb 2585 0 1 2 3 4 5 6 7 8 9 10 11 2586 -------------------------- 2587 row 3 |. . . d d d o o o o o o 2588 row 4 |. . . d d d o o o o o o 2589 row 5 |. . . d d d o o o o o o 2590 -------------------------- 2591 .ve 2592 2593 Thus, any entries in the d locations are stored in the d (diagonal) 2594 submatrix, and any entries in the o locations are stored in the 2595 o (off-diagonal) submatrix. Note that the d matrix is stored in 2596 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2597 2598 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2599 plus the diagonal part of the d matrix, 2600 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2601 In general, for PDE problems in which most nonzeros are near the diagonal, 2602 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2603 or you will get TERRIBLE performance; see the users' manual chapter on 2604 matrices. 2605 2606 Level: intermediate 2607 2608 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2609 @*/ 2610 2611 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2612 { 2613 PetscErrorCode ierr; 2614 PetscMPIInt size; 2615 2616 PetscFunctionBegin; 2617 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2618 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2619 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2620 if (size > 1) { 2621 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2622 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2623 } else { 2624 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2625 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2626 } 2627 PetscFunctionReturn(0); 2628 } 2629 2630 2631 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2632 { 2633 Mat mat; 2634 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2635 PetscErrorCode ierr; 2636 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2637 PetscScalar *array; 2638 2639 PetscFunctionBegin; 2640 *newmat = 0; 2641 2642 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2643 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2644 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2645 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2646 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2647 2648 mat->factortype = matin->factortype; 2649 mat->preallocated = PETSC_TRUE; 2650 mat->assembled = PETSC_TRUE; 2651 mat->insertmode = NOT_SET_VALUES; 2652 2653 a = (Mat_MPISBAIJ*)mat->data; 2654 a->bs2 = oldmat->bs2; 2655 a->mbs = oldmat->mbs; 2656 a->nbs = oldmat->nbs; 2657 a->Mbs = oldmat->Mbs; 2658 a->Nbs = oldmat->Nbs; 2659 2660 a->size = oldmat->size; 2661 a->rank = oldmat->rank; 2662 a->donotstash = oldmat->donotstash; 2663 a->roworiented = oldmat->roworiented; 2664 a->rowindices = 0; 2665 a->rowvalues = 0; 2666 a->getrowactive = PETSC_FALSE; 2667 a->barray = 0; 2668 a->rstartbs = oldmat->rstartbs; 2669 a->rendbs = oldmat->rendbs; 2670 a->cstartbs = oldmat->cstartbs; 2671 a->cendbs = oldmat->cendbs; 2672 2673 /* hash table stuff */ 2674 a->ht = 0; 2675 a->hd = 0; 2676 a->ht_size = 0; 2677 a->ht_flag = oldmat->ht_flag; 2678 a->ht_fact = oldmat->ht_fact; 2679 a->ht_total_ct = 0; 2680 a->ht_insert_ct = 0; 2681 2682 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr); 2683 if (oldmat->colmap) { 2684 #if defined(PETSC_USE_CTABLE) 2685 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2686 #else 2687 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2688 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2689 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 2690 #endif 2691 } else a->colmap = 0; 2692 2693 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2694 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2695 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2696 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 2697 } else a->garray = 0; 2698 2699 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2700 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2701 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2702 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2703 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2704 2705 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2706 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2707 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2708 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2709 2710 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2711 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2712 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2713 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2714 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2715 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2716 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2717 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2718 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2719 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2720 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2721 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2722 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2723 2724 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2725 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2726 a->sMvctx = oldmat->sMvctx; 2727 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2728 2729 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2730 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2731 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2732 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2733 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2734 *newmat = mat; 2735 PetscFunctionReturn(0); 2736 } 2737 2738 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2739 { 2740 PetscErrorCode ierr; 2741 PetscInt i,nz,j,rstart,rend; 2742 PetscScalar *vals,*buf; 2743 MPI_Comm comm; 2744 MPI_Status status; 2745 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2746 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2747 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2748 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2749 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2750 PetscInt dcount,kmax,k,nzcount,tmp; 2751 int fd; 2752 PetscBool isbinary; 2753 2754 PetscFunctionBegin; 2755 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2756 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 2757 2758 /* force binary viewer to load .info file if it has not yet done so */ 2759 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2760 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2761 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2762 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2763 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2764 if (bs < 0) bs = 1; 2765 2766 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2767 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2768 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2769 if (!rank) { 2770 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2771 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2772 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2773 } 2774 2775 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2776 M = header[1]; 2777 N = header[2]; 2778 2779 /* If global sizes are set, check if they are consistent with that given in the file */ 2780 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2781 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2782 2783 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2784 2785 /* 2786 This code adds extra rows to make sure the number of rows is 2787 divisible by the blocksize 2788 */ 2789 Mbs = M/bs; 2790 extra_rows = bs - M + bs*(Mbs); 2791 if (extra_rows == bs) extra_rows = 0; 2792 else Mbs++; 2793 if (extra_rows &&!rank) { 2794 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2795 } 2796 2797 /* determine ownership of all rows */ 2798 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2799 mbs = Mbs/size + ((Mbs % size) > rank); 2800 m = mbs*bs; 2801 } else { /* User Set */ 2802 m = newmat->rmap->n; 2803 mbs = m/bs; 2804 } 2805 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2806 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2807 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2808 rowners[0] = 0; 2809 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2810 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2811 rstart = rowners[rank]; 2812 rend = rowners[rank+1]; 2813 2814 /* distribute row lengths to all processors */ 2815 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2816 if (!rank) { 2817 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2818 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 2819 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2820 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2821 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2822 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2823 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2824 } else { 2825 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2826 } 2827 2828 if (!rank) { /* procs[0] */ 2829 /* calculate the number of nonzeros on each processor */ 2830 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2831 for (i=0; i<size; i++) { 2832 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2833 procsnz[i] += rowlengths[j]; 2834 } 2835 } 2836 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2837 2838 /* determine max buffer needed and allocate it */ 2839 maxnz = 0; 2840 for (i=0; i<size; i++) { 2841 maxnz = PetscMax(maxnz,procsnz[i]); 2842 } 2843 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2844 2845 /* read in my part of the matrix column indices */ 2846 nz = procsnz[0]; 2847 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2848 mycols = ibuf; 2849 if (size == 1) nz -= extra_rows; 2850 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2851 if (size == 1) { 2852 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2853 } 2854 2855 /* read in every ones (except the last) and ship off */ 2856 for (i=1; i<size-1; i++) { 2857 nz = procsnz[i]; 2858 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2859 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2860 } 2861 /* read in the stuff for the last proc */ 2862 if (size != 1) { 2863 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2864 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2865 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2866 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2867 } 2868 ierr = PetscFree(cols);CHKERRQ(ierr); 2869 } else { /* procs[i], i>0 */ 2870 /* determine buffer space needed for message */ 2871 nz = 0; 2872 for (i=0; i<m; i++) nz += locrowlens[i]; 2873 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2874 mycols = ibuf; 2875 /* receive message of column indices*/ 2876 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2877 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2878 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2879 } 2880 2881 /* loop over local rows, determining number of off diagonal entries */ 2882 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2883 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2884 rowcount = 0; 2885 nzcount = 0; 2886 for (i=0; i<mbs; i++) { 2887 dcount = 0; 2888 odcount = 0; 2889 for (j=0; j<bs; j++) { 2890 kmax = locrowlens[rowcount]; 2891 for (k=0; k<kmax; k++) { 2892 tmp = mycols[nzcount++]/bs; /* block col. index */ 2893 if (!mask[tmp]) { 2894 mask[tmp] = 1; 2895 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2896 else masked1[dcount++] = tmp; /* entry in diag portion */ 2897 } 2898 } 2899 rowcount++; 2900 } 2901 2902 dlens[i] = dcount; /* d_nzz[i] */ 2903 odlens[i] = odcount; /* o_nzz[i] */ 2904 2905 /* zero out the mask elements we set */ 2906 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2907 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2908 } 2909 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2910 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2911 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2912 2913 if (!rank) { 2914 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2915 /* read in my part of the matrix numerical values */ 2916 nz = procsnz[0]; 2917 vals = buf; 2918 mycols = ibuf; 2919 if (size == 1) nz -= extra_rows; 2920 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2921 if (size == 1) { 2922 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2923 } 2924 2925 /* insert into matrix */ 2926 jj = rstart*bs; 2927 for (i=0; i<m; i++) { 2928 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2929 mycols += locrowlens[i]; 2930 vals += locrowlens[i]; 2931 jj++; 2932 } 2933 2934 /* read in other processors (except the last one) and ship out */ 2935 for (i=1; i<size-1; i++) { 2936 nz = procsnz[i]; 2937 vals = buf; 2938 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2939 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2940 } 2941 /* the last proc */ 2942 if (size != 1) { 2943 nz = procsnz[i] - extra_rows; 2944 vals = buf; 2945 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2946 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2947 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2948 } 2949 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2950 2951 } else { 2952 /* receive numeric values */ 2953 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2954 2955 /* receive message of values*/ 2956 vals = buf; 2957 mycols = ibuf; 2958 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2959 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2960 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2961 2962 /* insert into matrix */ 2963 jj = rstart*bs; 2964 for (i=0; i<m; i++) { 2965 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2966 mycols += locrowlens[i]; 2967 vals += locrowlens[i]; 2968 jj++; 2969 } 2970 } 2971 2972 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2973 ierr = PetscFree(buf);CHKERRQ(ierr); 2974 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2975 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2976 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2977 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2978 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2979 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2980 PetscFunctionReturn(0); 2981 } 2982 2983 /*XXXXX@ 2984 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2985 2986 Input Parameters: 2987 . mat - the matrix 2988 . fact - factor 2989 2990 Not Collective on Mat, each process can have a different hash factor 2991 2992 Level: advanced 2993 2994 Notes: 2995 This can also be set by the command line option: -mat_use_hash_table fact 2996 2997 .seealso: MatSetOption() 2998 @XXXXX*/ 2999 3000 3001 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 3002 { 3003 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 3004 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 3005 PetscReal atmp; 3006 PetscReal *work,*svalues,*rvalues; 3007 PetscErrorCode ierr; 3008 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 3009 PetscMPIInt rank,size; 3010 PetscInt *rowners_bs,dest,count,source; 3011 PetscScalar *va; 3012 MatScalar *ba; 3013 MPI_Status stat; 3014 3015 PetscFunctionBegin; 3016 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 3017 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 3018 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3019 3020 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3021 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3022 3023 bs = A->rmap->bs; 3024 mbs = a->mbs; 3025 Mbs = a->Mbs; 3026 ba = b->a; 3027 bi = b->i; 3028 bj = b->j; 3029 3030 /* find ownerships */ 3031 rowners_bs = A->rmap->range; 3032 3033 /* each proc creates an array to be distributed */ 3034 ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr); 3035 3036 /* row_max for B */ 3037 if (rank != size-1) { 3038 for (i=0; i<mbs; i++) { 3039 ncols = bi[1] - bi[0]; bi++; 3040 brow = bs*i; 3041 for (j=0; j<ncols; j++) { 3042 bcol = bs*(*bj); 3043 for (kcol=0; kcol<bs; kcol++) { 3044 col = bcol + kcol; /* local col index */ 3045 col += rowners_bs[rank+1]; /* global col index */ 3046 for (krow=0; krow<bs; krow++) { 3047 atmp = PetscAbsScalar(*ba); ba++; 3048 row = brow + krow; /* local row index */ 3049 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 3050 if (work[col] < atmp) work[col] = atmp; 3051 } 3052 } 3053 bj++; 3054 } 3055 } 3056 3057 /* send values to its owners */ 3058 for (dest=rank+1; dest<size; dest++) { 3059 svalues = work + rowners_bs[dest]; 3060 count = rowners_bs[dest+1]-rowners_bs[dest]; 3061 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 3062 } 3063 } 3064 3065 /* receive values */ 3066 if (rank) { 3067 rvalues = work; 3068 count = rowners_bs[rank+1]-rowners_bs[rank]; 3069 for (source=0; source<rank; source++) { 3070 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 3071 /* process values */ 3072 for (i=0; i<count; i++) { 3073 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 3074 } 3075 } 3076 } 3077 3078 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3079 ierr = PetscFree(work);CHKERRQ(ierr); 3080 PetscFunctionReturn(0); 3081 } 3082 3083 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3084 { 3085 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3086 PetscErrorCode ierr; 3087 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 3088 PetscScalar *x,*ptr,*from; 3089 Vec bb1; 3090 const PetscScalar *b; 3091 3092 PetscFunctionBegin; 3093 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3094 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3095 3096 if (flag == SOR_APPLY_UPPER) { 3097 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3098 PetscFunctionReturn(0); 3099 } 3100 3101 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3102 if (flag & SOR_ZERO_INITIAL_GUESS) { 3103 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3104 its--; 3105 } 3106 3107 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3108 while (its--) { 3109 3110 /* lower triangular part: slvec0b = - B^T*xx */ 3111 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3112 3113 /* copy xx into slvec0a */ 3114 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3115 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3116 ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr); 3117 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3118 3119 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3120 3121 /* copy bb into slvec1a */ 3122 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3123 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3124 ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr); 3125 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3126 3127 /* set slvec1b = 0 */ 3128 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3129 3130 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3131 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3132 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3133 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3134 3135 /* upper triangular part: bb1 = bb1 - B*x */ 3136 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3137 3138 /* local diagonal sweep */ 3139 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3140 } 3141 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3142 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3143 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3144 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3145 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3146 } else if (flag & SOR_EISENSTAT) { 3147 Vec xx1; 3148 PetscBool hasop; 3149 const PetscScalar *diag; 3150 PetscScalar *sl,scale = (omega - 2.0)/omega; 3151 PetscInt i,n; 3152 3153 if (!mat->xx1) { 3154 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3155 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3156 } 3157 xx1 = mat->xx1; 3158 bb1 = mat->bb1; 3159 3160 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3161 3162 if (!mat->diag) { 3163 /* this is wrong for same matrix with new nonzero values */ 3164 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3165 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3166 } 3167 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3168 3169 if (hasop) { 3170 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3171 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3172 } else { 3173 /* 3174 These two lines are replaced by code that may be a bit faster for a good compiler 3175 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3176 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3177 */ 3178 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3179 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3180 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3181 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3182 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3183 if (omega == 1.0) { 3184 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3185 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3186 } else { 3187 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3188 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3189 } 3190 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3191 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3192 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3193 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3194 } 3195 3196 /* multiply off-diagonal portion of matrix */ 3197 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3198 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3199 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3200 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3201 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 3202 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3203 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3204 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3205 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3206 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3207 3208 /* local sweep */ 3209 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3210 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3211 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3212 PetscFunctionReturn(0); 3213 } 3214 3215 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3216 { 3217 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3218 PetscErrorCode ierr; 3219 Vec lvec1,bb1; 3220 3221 PetscFunctionBegin; 3222 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3223 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3224 3225 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3226 if (flag & SOR_ZERO_INITIAL_GUESS) { 3227 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3228 its--; 3229 } 3230 3231 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3232 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3233 while (its--) { 3234 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3235 3236 /* lower diagonal part: bb1 = bb - B^T*xx */ 3237 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3238 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3239 3240 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3241 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3242 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3243 3244 /* upper diagonal part: bb1 = bb1 - B*x */ 3245 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3246 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3247 3248 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3249 3250 /* diagonal sweep */ 3251 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3252 } 3253 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3254 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3255 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3256 PetscFunctionReturn(0); 3257 } 3258 3259 /*@ 3260 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3261 CSR format the local rows. 3262 3263 Collective 3264 3265 Input Parameters: 3266 + comm - MPI communicator 3267 . bs - the block size, only a block size of 1 is supported 3268 . m - number of local rows (Cannot be PETSC_DECIDE) 3269 . n - This value should be the same as the local size used in creating the 3270 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3271 calculated if N is given) For square matrices n is almost always m. 3272 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3273 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3274 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 3275 . j - column indices 3276 - a - matrix values 3277 3278 Output Parameter: 3279 . mat - the matrix 3280 3281 Level: intermediate 3282 3283 Notes: 3284 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3285 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3286 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3287 3288 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3289 3290 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3291 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3292 @*/ 3293 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3294 { 3295 PetscErrorCode ierr; 3296 3297 3298 PetscFunctionBegin; 3299 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3300 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3301 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3302 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3303 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3304 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3305 PetscFunctionReturn(0); 3306 } 3307 3308 3309 /*@C 3310 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 3311 3312 Collective 3313 3314 Input Parameters: 3315 + B - the matrix 3316 . bs - the block size 3317 . i - the indices into j for the start of each local row (starts with zero) 3318 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3319 - v - optional values in the matrix 3320 3321 Level: advanced 3322 3323 Notes: 3324 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 3325 and usually the numerical values as well 3326 3327 Any entries below the diagonal are ignored 3328 3329 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3330 @*/ 3331 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3332 { 3333 PetscErrorCode ierr; 3334 3335 PetscFunctionBegin; 3336 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3337 PetscFunctionReturn(0); 3338 } 3339 3340 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3341 { 3342 PetscErrorCode ierr; 3343 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3344 PetscInt *indx; 3345 PetscScalar *values; 3346 3347 PetscFunctionBegin; 3348 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3349 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3350 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3351 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3352 PetscInt *bindx,rmax=a->rmax,j; 3353 PetscMPIInt rank,size; 3354 3355 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3356 mbs = m/bs; Nbs = N/cbs; 3357 if (n == PETSC_DECIDE) { 3358 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 3359 } 3360 nbs = n/cbs; 3361 3362 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3363 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3364 3365 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3366 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 3367 if (rank == size-1) { 3368 /* Check sum(nbs) = Nbs */ 3369 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3370 } 3371 3372 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3373 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3374 for (i=0; i<mbs; i++) { 3375 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3376 nnz = nnz/bs; 3377 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3378 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3379 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3380 } 3381 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3382 ierr = PetscFree(bindx);CHKERRQ(ierr); 3383 3384 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3385 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3386 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3387 ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr); 3388 ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3389 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3390 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3391 } 3392 3393 /* numeric phase */ 3394 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3395 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3396 3397 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3398 for (i=0; i<m; i++) { 3399 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3400 Ii = i + rstart; 3401 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3402 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3403 } 3404 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3405 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3406 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3407 PetscFunctionReturn(0); 3408 } 3409