1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #if defined(PETSC_HAVE_SCALAPACK) 11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 12 #endif 13 14 /* This could be moved to matimpl.h */ 15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 16 { 17 Mat preallocator; 18 PetscInt r,rstart,rend; 19 PetscInt bs,i,m,n,M,N; 20 PetscBool cong = PETSC_TRUE; 21 PetscErrorCode ierr; 22 23 PetscFunctionBegin; 24 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 25 PetscValidLogicalCollectiveInt(B,nm,2); 26 for (i = 0; i < nm; i++) { 27 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 28 ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 29 if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 30 } 31 PetscValidLogicalCollectiveBool(B,fill,5); 32 ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 33 ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 34 ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 35 ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 36 ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 37 ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 38 ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 39 ierr = MatSetUp(preallocator);CHKERRQ(ierr); 40 ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 41 for (r = rstart; r < rend; ++r) { 42 PetscInt ncols; 43 const PetscInt *row; 44 const PetscScalar *vals; 45 46 for (i = 0; i < nm; i++) { 47 ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 48 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 49 if (symm && symm[i]) { 50 ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 51 } 52 ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 53 } 54 } 55 ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 56 ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 57 ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 58 ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 59 PetscFunctionReturn(0); 60 } 61 62 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 63 { 64 Mat B; 65 PetscErrorCode ierr; 66 PetscInt r; 67 68 PetscFunctionBegin; 69 if (reuse != MAT_REUSE_MATRIX) { 70 PetscBool symm = PETSC_TRUE,isdense; 71 PetscInt bs; 72 73 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 74 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 75 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 76 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 77 ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 78 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 79 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 80 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr); 81 if (!isdense) { 82 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 83 ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 84 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 85 } else { 86 ierr = MatSetUp(B);CHKERRQ(ierr); 87 } 88 } else { 89 B = *newmat; 90 ierr = MatZeroEntries(B);CHKERRQ(ierr); 91 } 92 93 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 94 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 95 PetscInt ncols; 96 const PetscInt *row; 97 const PetscScalar *vals; 98 99 ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 100 ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 101 #if defined(PETSC_USE_COMPLEX) 102 if (A->hermitian) { 103 PetscInt i; 104 for (i = 0; i < ncols; i++) { 105 ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr); 106 } 107 } else { 108 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 109 } 110 #else 111 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 112 #endif 113 ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 114 } 115 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 116 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 117 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 118 119 if (reuse == MAT_INPLACE_MATRIX) { 120 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 121 } else { 122 *newmat = B; 123 } 124 PetscFunctionReturn(0); 125 } 126 127 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 128 { 129 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 130 PetscErrorCode ierr; 131 132 PetscFunctionBegin; 133 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 134 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 139 { 140 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 141 PetscErrorCode ierr; 142 143 PetscFunctionBegin; 144 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 145 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 150 { \ 151 brow = row/bs; \ 152 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 153 rmax = aimax[brow]; nrow = ailen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto a_noinsert; \ 169 } \ 170 } \ 171 if (a->nonew == 1) goto a_noinsert; \ 172 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 173 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 174 N = nrow++ - 1; \ 175 /* shift up all the later entries in this row */ \ 176 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 177 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 178 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 A->nonzerostate++;\ 182 a_noinsert:; \ 183 ailen[brow] = nrow; \ 184 } 185 186 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 187 { \ 188 brow = row/bs; \ 189 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 190 rmax = bimax[brow]; nrow = bilen[brow]; \ 191 bcol = col/bs; \ 192 ridx = row % bs; cidx = col % bs; \ 193 low = 0; high = nrow; \ 194 while (high-low > 3) { \ 195 t = (low+high)/2; \ 196 if (rp[t] > bcol) high = t; \ 197 else low = t; \ 198 } \ 199 for (_i=low; _i<high; _i++) { \ 200 if (rp[_i] > bcol) break; \ 201 if (rp[_i] == bcol) { \ 202 bap = ap + bs2*_i + bs*cidx + ridx; \ 203 if (addv == ADD_VALUES) *bap += value; \ 204 else *bap = value; \ 205 goto b_noinsert; \ 206 } \ 207 } \ 208 if (b->nonew == 1) goto b_noinsert; \ 209 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 210 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 211 N = nrow++ - 1; \ 212 /* shift up all the later entries in this row */ \ 213 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 214 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 215 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 216 rp[_i] = bcol; \ 217 ap[bs2*_i + bs*cidx + ridx] = value; \ 218 B->nonzerostate++;\ 219 b_noinsert:; \ 220 bilen[brow] = nrow; \ 221 } 222 223 /* Only add/insert a(i,j) with i<=j (blocks). 224 Any a(i,j) with i>j input by user is ingored or generates an error 225 */ 226 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 227 { 228 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 229 MatScalar value; 230 PetscBool roworiented = baij->roworiented; 231 PetscErrorCode ierr; 232 PetscInt i,j,row,col; 233 PetscInt rstart_orig=mat->rmap->rstart; 234 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 235 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 236 237 /* Some Variables required in the macro */ 238 Mat A = baij->A; 239 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 240 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 241 MatScalar *aa =a->a; 242 243 Mat B = baij->B; 244 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 245 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 246 MatScalar *ba =b->a; 247 248 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 249 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 250 MatScalar *ap,*bap; 251 252 /* for stash */ 253 PetscInt n_loc, *in_loc = NULL; 254 MatScalar *v_loc = NULL; 255 256 PetscFunctionBegin; 257 if (!baij->donotstash) { 258 if (n > baij->n_loc) { 259 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 260 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 261 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 262 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 263 264 baij->n_loc = n; 265 } 266 in_loc = baij->in_loc; 267 v_loc = baij->v_loc; 268 } 269 270 for (i=0; i<m; i++) { 271 if (im[i] < 0) continue; 272 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 273 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 274 row = im[i] - rstart_orig; /* local row index */ 275 for (j=0; j<n; j++) { 276 if (im[i]/bs > in[j]/bs) { 277 if (a->ignore_ltriangular) { 278 continue; /* ignore lower triangular blocks */ 279 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 280 } 281 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 282 col = in[j] - cstart_orig; /* local col index */ 283 brow = row/bs; bcol = col/bs; 284 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 285 if (roworiented) value = v[i*n+j]; 286 else value = v[i+j*m]; 287 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 288 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 289 } else if (in[j] < 0) continue; 290 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 291 else { /* off-diag entry (B) */ 292 if (mat->was_assembled) { 293 if (!baij->colmap) { 294 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined(PETSC_USE_CTABLE) 297 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 298 col = col - 1; 299 #else 300 col = baij->colmap[in[j]/bs] - 1; 301 #endif 302 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 303 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 304 col = in[j]; 305 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 306 B = baij->B; 307 b = (Mat_SeqBAIJ*)(B)->data; 308 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 309 ba = b->a; 310 } else col += in[j]%bs; 311 } else col = in[j]; 312 if (roworiented) value = v[i*n+j]; 313 else value = v[i+j*m]; 314 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 315 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 316 } 317 } 318 } else { /* off processor entry */ 319 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 320 if (!baij->donotstash) { 321 mat->assembled = PETSC_FALSE; 322 n_loc = 0; 323 for (j=0; j<n; j++) { 324 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 325 in_loc[n_loc] = in[j]; 326 if (roworiented) { 327 v_loc[n_loc] = v[i*n+j]; 328 } else { 329 v_loc[n_loc] = v[j*m+i]; 330 } 331 n_loc++; 332 } 333 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 334 } 335 } 336 } 337 PetscFunctionReturn(0); 338 } 339 340 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 341 { 342 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 343 PetscErrorCode ierr; 344 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 345 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 346 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 347 PetscBool roworiented=a->roworiented; 348 const PetscScalar *value = v; 349 MatScalar *ap,*aa = a->a,*bap; 350 351 PetscFunctionBegin; 352 if (col < row) { 353 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 354 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 355 } 356 rp = aj + ai[row]; 357 ap = aa + bs2*ai[row]; 358 rmax = imax[row]; 359 nrow = ailen[row]; 360 value = v; 361 low = 0; 362 high = nrow; 363 364 while (high-low > 7) { 365 t = (low+high)/2; 366 if (rp[t] > col) high = t; 367 else low = t; 368 } 369 for (i=low; i<high; i++) { 370 if (rp[i] > col) break; 371 if (rp[i] == col) { 372 bap = ap + bs2*i; 373 if (roworiented) { 374 if (is == ADD_VALUES) { 375 for (ii=0; ii<bs; ii++) { 376 for (jj=ii; jj<bs2; jj+=bs) { 377 bap[jj] += *value++; 378 } 379 } 380 } else { 381 for (ii=0; ii<bs; ii++) { 382 for (jj=ii; jj<bs2; jj+=bs) { 383 bap[jj] = *value++; 384 } 385 } 386 } 387 } else { 388 if (is == ADD_VALUES) { 389 for (ii=0; ii<bs; ii++) { 390 for (jj=0; jj<bs; jj++) { 391 *bap++ += *value++; 392 } 393 } 394 } else { 395 for (ii=0; ii<bs; ii++) { 396 for (jj=0; jj<bs; jj++) { 397 *bap++ = *value++; 398 } 399 } 400 } 401 } 402 goto noinsert2; 403 } 404 } 405 if (nonew == 1) goto noinsert2; 406 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 407 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 408 N = nrow++ - 1; high++; 409 /* shift up all the later entries in this row */ 410 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 411 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 412 rp[i] = col; 413 bap = ap + bs2*i; 414 if (roworiented) { 415 for (ii=0; ii<bs; ii++) { 416 for (jj=ii; jj<bs2; jj+=bs) { 417 bap[jj] = *value++; 418 } 419 } 420 } else { 421 for (ii=0; ii<bs; ii++) { 422 for (jj=0; jj<bs; jj++) { 423 *bap++ = *value++; 424 } 425 } 426 } 427 noinsert2:; 428 ailen[row] = nrow; 429 PetscFunctionReturn(0); 430 } 431 432 /* 433 This routine is exactly duplicated in mpibaij.c 434 */ 435 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 436 { 437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 438 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 439 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 440 PetscErrorCode ierr; 441 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 442 PetscBool roworiented=a->roworiented; 443 const PetscScalar *value = v; 444 MatScalar *ap,*aa = a->a,*bap; 445 446 PetscFunctionBegin; 447 rp = aj + ai[row]; 448 ap = aa + bs2*ai[row]; 449 rmax = imax[row]; 450 nrow = ailen[row]; 451 low = 0; 452 high = nrow; 453 value = v; 454 while (high-low > 7) { 455 t = (low+high)/2; 456 if (rp[t] > col) high = t; 457 else low = t; 458 } 459 for (i=low; i<high; i++) { 460 if (rp[i] > col) break; 461 if (rp[i] == col) { 462 bap = ap + bs2*i; 463 if (roworiented) { 464 if (is == ADD_VALUES) { 465 for (ii=0; ii<bs; ii++) { 466 for (jj=ii; jj<bs2; jj+=bs) { 467 bap[jj] += *value++; 468 } 469 } 470 } else { 471 for (ii=0; ii<bs; ii++) { 472 for (jj=ii; jj<bs2; jj+=bs) { 473 bap[jj] = *value++; 474 } 475 } 476 } 477 } else { 478 if (is == ADD_VALUES) { 479 for (ii=0; ii<bs; ii++,value+=bs) { 480 for (jj=0; jj<bs; jj++) { 481 bap[jj] += value[jj]; 482 } 483 bap += bs; 484 } 485 } else { 486 for (ii=0; ii<bs; ii++,value+=bs) { 487 for (jj=0; jj<bs; jj++) { 488 bap[jj] = value[jj]; 489 } 490 bap += bs; 491 } 492 } 493 } 494 goto noinsert2; 495 } 496 } 497 if (nonew == 1) goto noinsert2; 498 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 499 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 500 N = nrow++ - 1; high++; 501 /* shift up all the later entries in this row */ 502 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 503 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 504 rp[i] = col; 505 bap = ap + bs2*i; 506 if (roworiented) { 507 for (ii=0; ii<bs; ii++) { 508 for (jj=ii; jj<bs2; jj+=bs) { 509 bap[jj] = *value++; 510 } 511 } 512 } else { 513 for (ii=0; ii<bs; ii++) { 514 for (jj=0; jj<bs; jj++) { 515 *bap++ = *value++; 516 } 517 } 518 } 519 noinsert2:; 520 ailen[row] = nrow; 521 PetscFunctionReturn(0); 522 } 523 524 /* 525 This routine could be optimized by removing the need for the block copy below and passing stride information 526 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 527 */ 528 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 529 { 530 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 531 const MatScalar *value; 532 MatScalar *barray =baij->barray; 533 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 534 PetscErrorCode ierr; 535 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 536 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 537 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 538 539 PetscFunctionBegin; 540 if (!barray) { 541 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 542 baij->barray = barray; 543 } 544 545 if (roworiented) { 546 stepval = (n-1)*bs; 547 } else { 548 stepval = (m-1)*bs; 549 } 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 553 if (im[i] >= rstart && im[i] < rend) { 554 row = im[i] - rstart; 555 for (j=0; j<n; j++) { 556 if (im[i] > in[j]) { 557 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 558 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 559 } 560 /* If NumCol = 1 then a copy is not required */ 561 if ((roworiented) && (n == 1)) { 562 barray = (MatScalar*) v + i*bs2; 563 } else if ((!roworiented) && (m == 1)) { 564 barray = (MatScalar*) v + j*bs2; 565 } else { /* Here a copy is required */ 566 if (roworiented) { 567 value = v + i*(stepval+bs)*bs + j*bs; 568 } else { 569 value = v + j*(stepval+bs)*bs + i*bs; 570 } 571 for (ii=0; ii<bs; ii++,value+=stepval) { 572 for (jj=0; jj<bs; jj++) { 573 *barray++ = *value++; 574 } 575 } 576 barray -=bs2; 577 } 578 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 582 } else if (in[j] < 0) continue; 583 else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 584 else { 585 if (mat->was_assembled) { 586 if (!baij->colmap) { 587 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 590 #if defined(PETSC_USE_DEBUG) 591 #if defined(PETSC_USE_CTABLE) 592 { PetscInt data; 593 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 594 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 595 } 596 #else 597 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 598 #endif 599 #endif 600 #if defined(PETSC_USE_CTABLE) 601 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 602 col = (col - 1)/bs; 603 #else 604 col = (baij->colmap[in[j]] - 1)/bs; 605 #endif 606 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 607 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 608 col = in[j]; 609 } 610 } else col = in[j]; 611 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 612 } 613 } 614 } else { 615 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 616 if (!baij->donotstash) { 617 if (roworiented) { 618 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 619 } else { 620 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 621 } 622 } 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 633 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 640 row = idxm[i] - bsrstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= bscstart && idxn[j] < bscend) { 645 col = idxn[j] - bscstart; 646 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!baij->colmap) { 649 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 653 data--; 654 #else 655 data = baij->colmap[idxn[j]/bs]-1; 656 #endif 657 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 658 else { 659 col = data + idxn[j]%bs; 660 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 661 } 662 } 663 } 664 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 665 } 666 PetscFunctionReturn(0); 667 } 668 669 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 670 { 671 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 672 PetscErrorCode ierr; 673 PetscReal sum[2],*lnorm2; 674 675 PetscFunctionBegin; 676 if (baij->size == 1) { 677 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 678 } else { 679 if (type == NORM_FROBENIUS) { 680 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 681 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 682 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 683 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 684 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 685 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 686 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 687 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 688 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 689 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 690 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 691 PetscReal *rsum,*rsum2,vabs; 692 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 693 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 694 MatScalar *v; 695 696 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 697 ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr); 698 /* Amat */ 699 v = amat->a; jj = amat->j; 700 for (brow=0; brow<mbs; brow++) { 701 grow = bs*(rstart + brow); 702 nz = amat->i[brow+1] - amat->i[brow]; 703 for (bcol=0; bcol<nz; bcol++) { 704 gcol = bs*(rstart + *jj); jj++; 705 for (col=0; col<bs; col++) { 706 for (row=0; row<bs; row++) { 707 vabs = PetscAbsScalar(*v); v++; 708 rsum[gcol+col] += vabs; 709 /* non-diagonal block */ 710 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 711 } 712 } 713 } 714 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 715 } 716 /* Bmat */ 717 v = bmat->a; jj = bmat->j; 718 for (brow=0; brow<mbs; brow++) { 719 grow = bs*(rstart + brow); 720 nz = bmat->i[brow+1] - bmat->i[brow]; 721 for (bcol=0; bcol<nz; bcol++) { 722 gcol = bs*garray[*jj]; jj++; 723 for (col=0; col<bs; col++) { 724 for (row=0; row<bs; row++) { 725 vabs = PetscAbsScalar(*v); v++; 726 rsum[gcol+col] += vabs; 727 rsum[grow+row] += vabs; 728 } 729 } 730 } 731 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 732 } 733 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 734 *norm = 0.0; 735 for (col=0; col<mat->cmap->N; col++) { 736 if (rsum2[col] > *norm) *norm = rsum2[col]; 737 } 738 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 739 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 740 } 741 PetscFunctionReturn(0); 742 } 743 744 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 745 { 746 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 747 PetscErrorCode ierr; 748 PetscInt nstash,reallocs; 749 750 PetscFunctionBegin; 751 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 752 753 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 754 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 755 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 756 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 757 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 758 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } 761 762 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 763 { 764 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 765 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 766 PetscErrorCode ierr; 767 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 768 PetscInt *row,*col; 769 PetscBool other_disassembled; 770 PetscMPIInt n; 771 PetscBool r1,r2,r3; 772 MatScalar *val; 773 774 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 775 PetscFunctionBegin; 776 if (!baij->donotstash && !mat->nooffprocentries) { 777 while (1) { 778 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 779 if (!flg) break; 780 781 for (i=0; i<n;) { 782 /* Now identify the consecutive vals belonging to the same row */ 783 for (j=i,rstart=row[j]; j<n; j++) { 784 if (row[j] != rstart) break; 785 } 786 if (j < n) ncols = j-i; 787 else ncols = n-i; 788 /* Now assemble all these values with a single function call */ 789 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 790 i = j; 791 } 792 } 793 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 794 /* Now process the block-stash. Since the values are stashed column-oriented, 795 set the roworiented flag to column oriented, and after MatSetValues() 796 restore the original flags */ 797 r1 = baij->roworiented; 798 r2 = a->roworiented; 799 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 800 801 baij->roworiented = PETSC_FALSE; 802 a->roworiented = PETSC_FALSE; 803 804 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 805 while (1) { 806 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 807 if (!flg) break; 808 809 for (i=0; i<n;) { 810 /* Now identify the consecutive vals belonging to the same row */ 811 for (j=i,rstart=row[j]; j<n; j++) { 812 if (row[j] != rstart) break; 813 } 814 if (j < n) ncols = j-i; 815 else ncols = n-i; 816 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 817 i = j; 818 } 819 } 820 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 821 822 baij->roworiented = r1; 823 a->roworiented = r2; 824 825 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 826 } 827 828 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 829 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 830 831 /* determine if any processor has disassembled, if so we must 832 also disassemble ourselfs, in order that we may reassemble. */ 833 /* 834 if nonzero structure of submatrix B cannot change then we know that 835 no processor disassembled thus we can skip this stuff 836 */ 837 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 838 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 839 if (mat->was_assembled && !other_disassembled) { 840 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 841 } 842 } 843 844 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 845 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 846 } 847 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 848 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 849 850 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 851 852 baij->rowvalues = 0; 853 854 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 855 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 856 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 857 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 858 } 859 PetscFunctionReturn(0); 860 } 861 862 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 863 #include <petscdraw.h> 864 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 865 { 866 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 867 PetscErrorCode ierr; 868 PetscInt bs = mat->rmap->bs; 869 PetscMPIInt rank = baij->rank; 870 PetscBool iascii,isdraw; 871 PetscViewer sviewer; 872 PetscViewerFormat format; 873 874 PetscFunctionBegin; 875 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 876 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 877 if (iascii) { 878 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 879 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 880 MatInfo info; 881 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 882 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 883 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 884 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 885 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 886 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 887 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 889 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 890 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 891 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 892 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 893 PetscFunctionReturn(0); 894 } else if (format == PETSC_VIEWER_ASCII_INFO) { 895 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 896 PetscFunctionReturn(0); 897 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 898 PetscFunctionReturn(0); 899 } 900 } 901 902 if (isdraw) { 903 PetscDraw draw; 904 PetscBool isnull; 905 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 906 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 907 if (isnull) PetscFunctionReturn(0); 908 } 909 910 { 911 /* assemble the entire matrix onto first processor. */ 912 Mat A; 913 Mat_SeqSBAIJ *Aloc; 914 Mat_SeqBAIJ *Bloc; 915 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 916 MatScalar *a; 917 const char *matname; 918 919 /* Should this be the same type as mat? */ 920 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 921 if (!rank) { 922 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 923 } else { 924 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 925 } 926 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 927 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 928 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 929 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 930 931 /* copy over the A part */ 932 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 933 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 934 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 935 936 for (i=0; i<mbs; i++) { 937 rvals[0] = bs*(baij->rstartbs + i); 938 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 939 for (j=ai[i]; j<ai[i+1]; j++) { 940 col = (baij->cstartbs+aj[j])*bs; 941 for (k=0; k<bs; k++) { 942 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 943 col++; 944 a += bs; 945 } 946 } 947 } 948 /* copy over the B part */ 949 Bloc = (Mat_SeqBAIJ*)baij->B->data; 950 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 951 for (i=0; i<mbs; i++) { 952 953 rvals[0] = bs*(baij->rstartbs + i); 954 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 955 for (j=ai[i]; j<ai[i+1]; j++) { 956 col = baij->garray[aj[j]]*bs; 957 for (k=0; k<bs; k++) { 958 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 959 col++; 960 a += bs; 961 } 962 } 963 } 964 ierr = PetscFree(rvals);CHKERRQ(ierr); 965 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967 /* 968 Everyone has to call to draw the matrix since the graphics waits are 969 synchronized across all processors that share the PetscDraw object 970 */ 971 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 972 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 973 if (!rank) { 974 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 975 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 976 } 977 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 978 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 979 ierr = MatDestroy(&A);CHKERRQ(ierr); 980 } 981 PetscFunctionReturn(0); 982 } 983 984 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 985 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 986 987 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 988 { 989 PetscErrorCode ierr; 990 PetscBool iascii,isdraw,issocket,isbinary; 991 992 PetscFunctionBegin; 993 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 994 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 995 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 996 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 997 if (iascii || isdraw || issocket) { 998 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 999 } else if (isbinary) { 1000 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1001 } 1002 PetscFunctionReturn(0); 1003 } 1004 1005 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1006 { 1007 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1008 PetscErrorCode ierr; 1009 1010 PetscFunctionBegin; 1011 #if defined(PETSC_USE_LOG) 1012 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1013 #endif 1014 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1015 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1016 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1017 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1018 #if defined(PETSC_USE_CTABLE) 1019 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1020 #else 1021 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1022 #endif 1023 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1024 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1025 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1026 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1027 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1028 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1029 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1030 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1031 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1032 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1033 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1034 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1035 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1036 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1037 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1038 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1039 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1040 #endif 1041 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1042 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1043 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1044 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1045 1046 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1047 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1048 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1049 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1050 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1051 #if defined(PETSC_HAVE_ELEMENTAL) 1052 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1053 #endif 1054 #if defined(PETSC_HAVE_SCALAPACK) 1055 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL);CHKERRQ(ierr); 1056 #endif 1057 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1058 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1063 { 1064 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1065 PetscErrorCode ierr; 1066 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1067 PetscScalar *from; 1068 const PetscScalar *x; 1069 1070 PetscFunctionBegin; 1071 /* diagonal part */ 1072 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1073 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1074 1075 /* subdiagonal part */ 1076 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1077 1078 /* copy x into the vec slvec0 */ 1079 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1080 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1081 1082 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1083 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1084 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1085 1086 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 /* supperdiagonal part */ 1089 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1094 { 1095 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1098 PetscScalar *from; 1099 const PetscScalar *x; 1100 1101 PetscFunctionBegin; 1102 /* diagonal part */ 1103 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1104 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1105 1106 /* subdiagonal part */ 1107 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1108 1109 /* copy x into the vec slvec0 */ 1110 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1111 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1112 1113 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1114 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1115 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1116 1117 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 /* supperdiagonal part */ 1120 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1125 { 1126 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 PetscInt nt; 1129 1130 PetscFunctionBegin; 1131 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1132 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1133 1134 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1135 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1136 1137 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 /* do diagonal part */ 1139 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* do supperdiagonal part */ 1141 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1143 /* do subdiagonal part */ 1144 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1145 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1146 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz) 1151 { 1152 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1153 PetscErrorCode ierr; 1154 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1155 PetscScalar *from,zero=0.0; 1156 const PetscScalar *x; 1157 1158 PetscFunctionBegin; 1159 /* diagonal part */ 1160 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1161 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1162 1163 /* subdiagonal part */ 1164 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1165 1166 /* copy x into the vec slvec0 */ 1167 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1168 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1169 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1170 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1171 1172 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1173 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1174 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1175 1176 /* supperdiagonal part */ 1177 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1178 PetscFunctionReturn(0); 1179 } 1180 1181 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1182 { 1183 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1184 PetscErrorCode ierr; 1185 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1186 PetscScalar *from,zero=0.0; 1187 const PetscScalar *x; 1188 1189 PetscFunctionBegin; 1190 /* diagonal part */ 1191 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1192 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1193 1194 /* subdiagonal part */ 1195 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1196 1197 /* copy x into the vec slvec0 */ 1198 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1199 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1200 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1201 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1202 1203 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1204 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1205 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 1207 /* supperdiagonal part */ 1208 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1213 { 1214 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1219 /* do diagonal part */ 1220 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1221 /* do supperdiagonal part */ 1222 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1224 1225 /* do subdiagonal part */ 1226 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1227 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 /* 1233 This only works correctly for square matrices where the subblock A->A is the 1234 diagonal block 1235 */ 1236 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1237 { 1238 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1239 PetscErrorCode ierr; 1240 1241 PetscFunctionBegin; 1242 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1243 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1248 { 1249 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1254 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1259 { 1260 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1261 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1262 PetscErrorCode ierr; 1263 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1264 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1265 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1266 1267 PetscFunctionBegin; 1268 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1269 mat->getrowactive = PETSC_TRUE; 1270 1271 if (!mat->rowvalues && (idx || v)) { 1272 /* 1273 allocate enough space to hold information from the longest row. 1274 */ 1275 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1276 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1277 PetscInt max = 1,mbs = mat->mbs,tmp; 1278 for (i=0; i<mbs; i++) { 1279 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1280 if (max < tmp) max = tmp; 1281 } 1282 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1283 } 1284 1285 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1286 lrow = row - brstart; /* local row index */ 1287 1288 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1289 if (!v) {pvA = 0; pvB = 0;} 1290 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1291 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1292 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1293 nztot = nzA + nzB; 1294 1295 cmap = mat->garray; 1296 if (v || idx) { 1297 if (nztot) { 1298 /* Sort by increasing column numbers, assuming A and B already sorted */ 1299 PetscInt imark = -1; 1300 if (v) { 1301 *v = v_p = mat->rowvalues; 1302 for (i=0; i<nzB; i++) { 1303 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1304 else break; 1305 } 1306 imark = i; 1307 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1308 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1309 } 1310 if (idx) { 1311 *idx = idx_p = mat->rowindices; 1312 if (imark > -1) { 1313 for (i=0; i<imark; i++) { 1314 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1315 } 1316 } else { 1317 for (i=0; i<nzB; i++) { 1318 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1319 else break; 1320 } 1321 imark = i; 1322 } 1323 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1324 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1325 } 1326 } else { 1327 if (idx) *idx = 0; 1328 if (v) *v = 0; 1329 } 1330 } 1331 *nz = nztot; 1332 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1333 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1338 { 1339 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1340 1341 PetscFunctionBegin; 1342 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1343 baij->getrowactive = PETSC_FALSE; 1344 PetscFunctionReturn(0); 1345 } 1346 1347 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1348 { 1349 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1350 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1351 1352 PetscFunctionBegin; 1353 aA->getrow_utriangular = PETSC_TRUE; 1354 PetscFunctionReturn(0); 1355 } 1356 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1357 { 1358 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1359 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1360 1361 PetscFunctionBegin; 1362 aA->getrow_utriangular = PETSC_FALSE; 1363 PetscFunctionReturn(0); 1364 } 1365 1366 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1367 { 1368 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1369 PetscErrorCode ierr; 1370 1371 PetscFunctionBegin; 1372 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1373 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1378 { 1379 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1380 PetscErrorCode ierr; 1381 1382 PetscFunctionBegin; 1383 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1384 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 1388 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1389 Input: isrow - distributed(parallel), 1390 iscol_local - locally owned (seq) 1391 */ 1392 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1393 { 1394 PetscErrorCode ierr; 1395 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1396 const PetscInt *ptr1,*ptr2; 1397 1398 PetscFunctionBegin; 1399 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1400 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1401 if (sz1 > sz2) { 1402 *flg = PETSC_FALSE; 1403 PetscFunctionReturn(0); 1404 } 1405 1406 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1407 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1408 1409 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1410 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1411 ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr); 1412 ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr); 1413 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1414 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1415 1416 nmatch=0; 1417 k = 0; 1418 for (i=0; i<sz1; i++){ 1419 for (j=k; j<sz2; j++){ 1420 if (a1[i] == a2[j]) { 1421 k = j; nmatch++; 1422 break; 1423 } 1424 } 1425 } 1426 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1427 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1428 ierr = PetscFree(a1);CHKERRQ(ierr); 1429 ierr = PetscFree(a2);CHKERRQ(ierr); 1430 if (nmatch < sz1) { 1431 *flg = PETSC_FALSE; 1432 } else { 1433 *flg = PETSC_TRUE; 1434 } 1435 PetscFunctionReturn(0); 1436 } 1437 1438 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1439 { 1440 PetscErrorCode ierr; 1441 IS iscol_local; 1442 PetscInt csize; 1443 PetscBool isequal; 1444 1445 PetscFunctionBegin; 1446 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1447 if (call == MAT_REUSE_MATRIX) { 1448 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1449 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1450 } else { 1451 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1452 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1453 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1454 } 1455 1456 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1457 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1458 if (call == MAT_INITIAL_MATRIX) { 1459 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1460 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1461 } 1462 PetscFunctionReturn(0); 1463 } 1464 1465 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1466 { 1467 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1468 PetscErrorCode ierr; 1469 1470 PetscFunctionBegin; 1471 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1472 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1477 { 1478 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1479 Mat A = a->A,B = a->B; 1480 PetscErrorCode ierr; 1481 PetscLogDouble isend[5],irecv[5]; 1482 1483 PetscFunctionBegin; 1484 info->block_size = (PetscReal)matin->rmap->bs; 1485 1486 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1487 1488 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1489 isend[3] = info->memory; isend[4] = info->mallocs; 1490 1491 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1492 1493 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1494 isend[3] += info->memory; isend[4] += info->mallocs; 1495 if (flag == MAT_LOCAL) { 1496 info->nz_used = isend[0]; 1497 info->nz_allocated = isend[1]; 1498 info->nz_unneeded = isend[2]; 1499 info->memory = isend[3]; 1500 info->mallocs = isend[4]; 1501 } else if (flag == MAT_GLOBAL_MAX) { 1502 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1503 1504 info->nz_used = irecv[0]; 1505 info->nz_allocated = irecv[1]; 1506 info->nz_unneeded = irecv[2]; 1507 info->memory = irecv[3]; 1508 info->mallocs = irecv[4]; 1509 } else if (flag == MAT_GLOBAL_SUM) { 1510 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1511 1512 info->nz_used = irecv[0]; 1513 info->nz_allocated = irecv[1]; 1514 info->nz_unneeded = irecv[2]; 1515 info->memory = irecv[3]; 1516 info->mallocs = irecv[4]; 1517 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1518 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1519 info->fill_ratio_needed = 0; 1520 info->factor_mallocs = 0; 1521 PetscFunctionReturn(0); 1522 } 1523 1524 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1525 { 1526 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1527 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1528 PetscErrorCode ierr; 1529 1530 PetscFunctionBegin; 1531 switch (op) { 1532 case MAT_NEW_NONZERO_LOCATIONS: 1533 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1534 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1535 case MAT_KEEP_NONZERO_PATTERN: 1536 case MAT_SUBMAT_SINGLEIS: 1537 case MAT_NEW_NONZERO_LOCATION_ERR: 1538 MatCheckPreallocated(A,1); 1539 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1540 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1541 break; 1542 case MAT_ROW_ORIENTED: 1543 MatCheckPreallocated(A,1); 1544 a->roworiented = flg; 1545 1546 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1547 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1548 break; 1549 case MAT_NEW_DIAGONALS: 1550 case MAT_SORTED_FULL: 1551 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1552 break; 1553 case MAT_IGNORE_OFF_PROC_ENTRIES: 1554 a->donotstash = flg; 1555 break; 1556 case MAT_USE_HASH_TABLE: 1557 a->ht_flag = flg; 1558 break; 1559 case MAT_HERMITIAN: 1560 MatCheckPreallocated(A,1); 1561 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1562 #if defined(PETSC_USE_COMPLEX) 1563 if (flg) { /* need different mat-vec ops */ 1564 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1565 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1566 A->ops->multtranspose = NULL; 1567 A->ops->multtransposeadd = NULL; 1568 A->symmetric = PETSC_FALSE; 1569 } 1570 #endif 1571 break; 1572 case MAT_SPD: 1573 case MAT_SYMMETRIC: 1574 MatCheckPreallocated(A,1); 1575 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1576 #if defined(PETSC_USE_COMPLEX) 1577 if (flg) { /* restore to use default mat-vec ops */ 1578 A->ops->mult = MatMult_MPISBAIJ; 1579 A->ops->multadd = MatMultAdd_MPISBAIJ; 1580 A->ops->multtranspose = MatMult_MPISBAIJ; 1581 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1582 } 1583 #endif 1584 break; 1585 case MAT_STRUCTURALLY_SYMMETRIC: 1586 MatCheckPreallocated(A,1); 1587 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1588 break; 1589 case MAT_SYMMETRY_ETERNAL: 1590 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1591 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1592 break; 1593 case MAT_IGNORE_LOWER_TRIANGULAR: 1594 aA->ignore_ltriangular = flg; 1595 break; 1596 case MAT_ERROR_LOWER_TRIANGULAR: 1597 aA->ignore_ltriangular = flg; 1598 break; 1599 case MAT_GETROW_UPPERTRIANGULAR: 1600 aA->getrow_utriangular = flg; 1601 break; 1602 default: 1603 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1604 } 1605 PetscFunctionReturn(0); 1606 } 1607 1608 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1609 { 1610 PetscErrorCode ierr; 1611 1612 PetscFunctionBegin; 1613 if (reuse == MAT_INITIAL_MATRIX) { 1614 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1615 } else if (reuse == MAT_REUSE_MATRIX) { 1616 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1617 } 1618 PetscFunctionReturn(0); 1619 } 1620 1621 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1622 { 1623 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1624 Mat a = baij->A, b=baij->B; 1625 PetscErrorCode ierr; 1626 PetscInt nv,m,n; 1627 PetscBool flg; 1628 1629 PetscFunctionBegin; 1630 if (ll != rr) { 1631 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1632 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1633 } 1634 if (!ll) PetscFunctionReturn(0); 1635 1636 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1637 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1638 1639 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1640 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1641 1642 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 1644 /* left diagonalscale the off-diagonal part */ 1645 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1646 1647 /* scale the diagonal part */ 1648 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1649 1650 /* right diagonalscale the off-diagonal part */ 1651 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1652 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1653 PetscFunctionReturn(0); 1654 } 1655 1656 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1657 { 1658 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1659 PetscErrorCode ierr; 1660 1661 PetscFunctionBegin; 1662 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1663 PetscFunctionReturn(0); 1664 } 1665 1666 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1667 1668 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1669 { 1670 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1671 Mat a,b,c,d; 1672 PetscBool flg; 1673 PetscErrorCode ierr; 1674 1675 PetscFunctionBegin; 1676 a = matA->A; b = matA->B; 1677 c = matB->A; d = matB->B; 1678 1679 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1680 if (flg) { 1681 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1682 } 1683 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1684 PetscFunctionReturn(0); 1685 } 1686 1687 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1688 { 1689 PetscErrorCode ierr; 1690 PetscBool isbaij; 1691 1692 PetscFunctionBegin; 1693 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 1694 if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1695 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1696 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1697 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1698 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1699 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1700 } else { 1701 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1702 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1703 1704 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1705 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1706 } 1707 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1708 PetscFunctionReturn(0); 1709 } 1710 1711 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1712 { 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1721 { 1722 PetscErrorCode ierr; 1723 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1724 PetscBLASInt bnz,one=1; 1725 Mat_SeqSBAIJ *xa,*ya; 1726 Mat_SeqBAIJ *xb,*yb; 1727 1728 PetscFunctionBegin; 1729 if (str == SAME_NONZERO_PATTERN) { 1730 PetscScalar alpha = a; 1731 xa = (Mat_SeqSBAIJ*)xx->A->data; 1732 ya = (Mat_SeqSBAIJ*)yy->A->data; 1733 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1734 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1735 xb = (Mat_SeqBAIJ*)xx->B->data; 1736 yb = (Mat_SeqBAIJ*)yy->B->data; 1737 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1738 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1739 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1740 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1741 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1742 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1743 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1744 } else { 1745 Mat B; 1746 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1747 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1748 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1749 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1750 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1751 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1752 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1753 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1754 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1755 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1756 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1757 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1758 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1759 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1760 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1761 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1762 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1763 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1764 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1765 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1766 } 1767 PetscFunctionReturn(0); 1768 } 1769 1770 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1771 { 1772 PetscErrorCode ierr; 1773 PetscInt i; 1774 PetscBool flg; 1775 1776 PetscFunctionBegin; 1777 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1778 for (i=0; i<n; i++) { 1779 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1780 if (!flg) { 1781 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1782 } 1783 } 1784 PetscFunctionReturn(0); 1785 } 1786 1787 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1788 { 1789 PetscErrorCode ierr; 1790 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1791 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1792 1793 PetscFunctionBegin; 1794 if (!Y->preallocated) { 1795 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1796 } else if (!aij->nz) { 1797 PetscInt nonew = aij->nonew; 1798 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1799 aij->nonew = nonew; 1800 } 1801 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1802 PetscFunctionReturn(0); 1803 } 1804 1805 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1806 { 1807 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1808 PetscErrorCode ierr; 1809 1810 PetscFunctionBegin; 1811 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1812 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1813 if (d) { 1814 PetscInt rstart; 1815 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1816 *d += rstart/A->rmap->bs; 1817 1818 } 1819 PetscFunctionReturn(0); 1820 } 1821 1822 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1823 { 1824 PetscFunctionBegin; 1825 *a = ((Mat_MPISBAIJ*)A->data)->A; 1826 PetscFunctionReturn(0); 1827 } 1828 1829 /* -------------------------------------------------------------------*/ 1830 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1831 MatGetRow_MPISBAIJ, 1832 MatRestoreRow_MPISBAIJ, 1833 MatMult_MPISBAIJ, 1834 /* 4*/ MatMultAdd_MPISBAIJ, 1835 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1836 MatMultAdd_MPISBAIJ, 1837 0, 1838 0, 1839 0, 1840 /* 10*/ 0, 1841 0, 1842 0, 1843 MatSOR_MPISBAIJ, 1844 MatTranspose_MPISBAIJ, 1845 /* 15*/ MatGetInfo_MPISBAIJ, 1846 MatEqual_MPISBAIJ, 1847 MatGetDiagonal_MPISBAIJ, 1848 MatDiagonalScale_MPISBAIJ, 1849 MatNorm_MPISBAIJ, 1850 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1851 MatAssemblyEnd_MPISBAIJ, 1852 MatSetOption_MPISBAIJ, 1853 MatZeroEntries_MPISBAIJ, 1854 /* 24*/ 0, 1855 0, 1856 0, 1857 0, 1858 0, 1859 /* 29*/ MatSetUp_MPISBAIJ, 1860 0, 1861 0, 1862 MatGetDiagonalBlock_MPISBAIJ, 1863 0, 1864 /* 34*/ MatDuplicate_MPISBAIJ, 1865 0, 1866 0, 1867 0, 1868 0, 1869 /* 39*/ MatAXPY_MPISBAIJ, 1870 MatCreateSubMatrices_MPISBAIJ, 1871 MatIncreaseOverlap_MPISBAIJ, 1872 MatGetValues_MPISBAIJ, 1873 MatCopy_MPISBAIJ, 1874 /* 44*/ 0, 1875 MatScale_MPISBAIJ, 1876 MatShift_MPISBAIJ, 1877 0, 1878 0, 1879 /* 49*/ 0, 1880 0, 1881 0, 1882 0, 1883 0, 1884 /* 54*/ 0, 1885 0, 1886 MatSetUnfactored_MPISBAIJ, 1887 0, 1888 MatSetValuesBlocked_MPISBAIJ, 1889 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1890 0, 1891 0, 1892 0, 1893 0, 1894 /* 64*/ 0, 1895 0, 1896 0, 1897 0, 1898 0, 1899 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1900 0, 1901 MatConvert_MPISBAIJ_Basic, 1902 0, 1903 0, 1904 /* 74*/ 0, 1905 0, 1906 0, 1907 0, 1908 0, 1909 /* 79*/ 0, 1910 0, 1911 0, 1912 0, 1913 MatLoad_MPISBAIJ, 1914 /* 84*/ 0, 1915 0, 1916 0, 1917 0, 1918 0, 1919 /* 89*/ 0, 1920 0, 1921 0, 1922 0, 1923 0, 1924 /* 94*/ 0, 1925 0, 1926 0, 1927 0, 1928 0, 1929 /* 99*/ 0, 1930 0, 1931 0, 1932 0, 1933 0, 1934 /*104*/ 0, 1935 MatRealPart_MPISBAIJ, 1936 MatImaginaryPart_MPISBAIJ, 1937 MatGetRowUpperTriangular_MPISBAIJ, 1938 MatRestoreRowUpperTriangular_MPISBAIJ, 1939 /*109*/ 0, 1940 0, 1941 0, 1942 0, 1943 MatMissingDiagonal_MPISBAIJ, 1944 /*114*/ 0, 1945 0, 1946 0, 1947 0, 1948 0, 1949 /*119*/ 0, 1950 0, 1951 0, 1952 0, 1953 0, 1954 /*124*/ 0, 1955 0, 1956 0, 1957 0, 1958 0, 1959 /*129*/ 0, 1960 0, 1961 0, 1962 0, 1963 0, 1964 /*134*/ 0, 1965 0, 1966 0, 1967 0, 1968 0, 1969 /*139*/ MatSetBlockSizes_Default, 1970 0, 1971 0, 1972 0, 1973 0, 1974 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1975 }; 1976 1977 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1978 { 1979 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1980 PetscErrorCode ierr; 1981 PetscInt i,mbs,Mbs; 1982 PetscMPIInt size; 1983 1984 PetscFunctionBegin; 1985 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1986 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1987 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1988 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1989 if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N); 1990 if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n); 1991 1992 mbs = B->rmap->n/bs; 1993 Mbs = B->rmap->N/bs; 1994 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1995 1996 B->rmap->bs = bs; 1997 b->bs2 = bs*bs; 1998 b->mbs = mbs; 1999 b->Mbs = Mbs; 2000 b->nbs = B->cmap->n/bs; 2001 b->Nbs = B->cmap->N/bs; 2002 2003 for (i=0; i<=b->size; i++) { 2004 b->rangebs[i] = B->rmap->range[i]/bs; 2005 } 2006 b->rstartbs = B->rmap->rstart/bs; 2007 b->rendbs = B->rmap->rend/bs; 2008 2009 b->cstartbs = B->cmap->rstart/bs; 2010 b->cendbs = B->cmap->rend/bs; 2011 2012 #if defined(PETSC_USE_CTABLE) 2013 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2014 #else 2015 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2016 #endif 2017 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2018 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2019 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2020 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2021 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2022 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2023 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2024 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2025 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2026 2027 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2028 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2029 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2030 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2031 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2032 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2033 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2034 2035 if (!B->preallocated) { 2036 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2037 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2038 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2039 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2040 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2041 } 2042 2043 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2044 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2045 2046 B->preallocated = PETSC_TRUE; 2047 B->was_assembled = PETSC_FALSE; 2048 B->assembled = PETSC_FALSE; 2049 PetscFunctionReturn(0); 2050 } 2051 2052 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2053 { 2054 PetscInt m,rstart,cend; 2055 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0; 2056 const PetscInt *JJ =0; 2057 PetscScalar *values=0; 2058 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 2059 PetscErrorCode ierr; 2060 PetscBool nooffprocentries; 2061 2062 PetscFunctionBegin; 2063 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2064 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2065 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2066 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2067 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2068 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2069 m = B->rmap->n/bs; 2070 rstart = B->rmap->rstart/bs; 2071 cend = B->cmap->rend/bs; 2072 2073 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2074 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2075 for (i=0; i<m; i++) { 2076 nz = ii[i+1] - ii[i]; 2077 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2078 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2079 JJ = jj + ii[i]; 2080 bd = 0; 2081 for (j=0; j<nz; j++) { 2082 if (*JJ >= i + rstart) break; 2083 JJ++; 2084 bd++; 2085 } 2086 d = 0; 2087 for (; j<nz; j++) { 2088 if (*JJ++ >= cend) break; 2089 d++; 2090 } 2091 d_nnz[i] = d; 2092 o_nnz[i] = nz - d - bd; 2093 nz = nz - bd; 2094 nz_max = PetscMax(nz_max,nz); 2095 } 2096 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2097 ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2098 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2099 2100 values = (PetscScalar*)V; 2101 if (!values) { 2102 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2103 } 2104 for (i=0; i<m; i++) { 2105 PetscInt row = i + rstart; 2106 PetscInt ncols = ii[i+1] - ii[i]; 2107 const PetscInt *icols = jj + ii[i]; 2108 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2109 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2110 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2111 } else { /* block ordering does not match so we can only insert one block at a time. */ 2112 PetscInt j; 2113 for (j=0; j<ncols; j++) { 2114 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2115 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2116 } 2117 } 2118 } 2119 2120 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2121 nooffprocentries = B->nooffprocentries; 2122 B->nooffprocentries = PETSC_TRUE; 2123 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2124 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2125 B->nooffprocentries = nooffprocentries; 2126 2127 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2128 PetscFunctionReturn(0); 2129 } 2130 2131 /*MC 2132 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2133 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2134 the matrix is stored. 2135 2136 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2137 can call MatSetOption(Mat, MAT_HERMITIAN); 2138 2139 Options Database Keys: 2140 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2141 2142 Notes: 2143 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2144 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2145 2146 Level: beginner 2147 2148 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType 2149 M*/ 2150 2151 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2152 { 2153 Mat_MPISBAIJ *b; 2154 PetscErrorCode ierr; 2155 PetscBool flg = PETSC_FALSE; 2156 2157 PetscFunctionBegin; 2158 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2159 B->data = (void*)b; 2160 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2161 2162 B->ops->destroy = MatDestroy_MPISBAIJ; 2163 B->ops->view = MatView_MPISBAIJ; 2164 B->assembled = PETSC_FALSE; 2165 B->insertmode = NOT_SET_VALUES; 2166 2167 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2168 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2169 2170 /* build local table of row and column ownerships */ 2171 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2172 2173 /* build cache for off array entries formed */ 2174 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2175 2176 b->donotstash = PETSC_FALSE; 2177 b->colmap = NULL; 2178 b->garray = NULL; 2179 b->roworiented = PETSC_TRUE; 2180 2181 /* stuff used in block assembly */ 2182 b->barray = 0; 2183 2184 /* stuff used for matrix vector multiply */ 2185 b->lvec = 0; 2186 b->Mvctx = 0; 2187 b->slvec0 = 0; 2188 b->slvec0b = 0; 2189 b->slvec1 = 0; 2190 b->slvec1a = 0; 2191 b->slvec1b = 0; 2192 b->sMvctx = 0; 2193 2194 /* stuff for MatGetRow() */ 2195 b->rowindices = 0; 2196 b->rowvalues = 0; 2197 b->getrowactive = PETSC_FALSE; 2198 2199 /* hash table stuff */ 2200 b->ht = 0; 2201 b->hd = 0; 2202 b->ht_size = 0; 2203 b->ht_flag = PETSC_FALSE; 2204 b->ht_fact = 0; 2205 b->ht_total_ct = 0; 2206 b->ht_insert_ct = 0; 2207 2208 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2209 b->ijonly = PETSC_FALSE; 2210 2211 b->in_loc = 0; 2212 b->v_loc = 0; 2213 b->n_loc = 0; 2214 2215 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2216 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2217 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2218 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2219 #if defined(PETSC_HAVE_ELEMENTAL) 2220 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2221 #endif 2222 #if defined(PETSC_HAVE_SCALAPACK) 2223 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK);CHKERRQ(ierr); 2224 #endif 2225 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2226 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2227 2228 B->symmetric = PETSC_TRUE; 2229 B->structurally_symmetric = PETSC_TRUE; 2230 B->symmetric_set = PETSC_TRUE; 2231 B->structurally_symmetric_set = PETSC_TRUE; 2232 B->symmetric_eternal = PETSC_TRUE; 2233 #if defined(PETSC_USE_COMPLEX) 2234 B->hermitian = PETSC_FALSE; 2235 B->hermitian_set = PETSC_FALSE; 2236 #else 2237 B->hermitian = PETSC_TRUE; 2238 B->hermitian_set = PETSC_TRUE; 2239 #endif 2240 2241 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2242 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2243 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2244 if (flg) { 2245 PetscReal fact = 1.39; 2246 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2247 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2248 if (fact <= 1.0) fact = 1.39; 2249 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2250 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2251 } 2252 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2253 PetscFunctionReturn(0); 2254 } 2255 2256 /*MC 2257 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2258 2259 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2260 and MATMPISBAIJ otherwise. 2261 2262 Options Database Keys: 2263 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2264 2265 Level: beginner 2266 2267 .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ 2268 M*/ 2269 2270 /*@C 2271 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2272 the user should preallocate the matrix storage by setting the parameters 2273 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2274 performance can be increased by more than a factor of 50. 2275 2276 Collective on Mat 2277 2278 Input Parameters: 2279 + B - the matrix 2280 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2281 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2282 . d_nz - number of block nonzeros per block row in diagonal portion of local 2283 submatrix (same for all local rows) 2284 . d_nnz - array containing the number of block nonzeros in the various block rows 2285 in the upper triangular and diagonal part of the in diagonal portion of the local 2286 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2287 for the diagonal entry and set a value even if it is zero. 2288 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2289 submatrix (same for all local rows). 2290 - o_nnz - array containing the number of nonzeros in the various block rows of the 2291 off-diagonal portion of the local submatrix that is right of the diagonal 2292 (possibly different for each block row) or NULL. 2293 2294 2295 Options Database Keys: 2296 + -mat_no_unroll - uses code that does not unroll the loops in the 2297 block calculations (much slower) 2298 - -mat_block_size - size of the blocks to use 2299 2300 Notes: 2301 2302 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2303 than it must be used on all processors that share the object for that argument. 2304 2305 If the *_nnz parameter is given then the *_nz parameter is ignored 2306 2307 Storage Information: 2308 For a square global matrix we define each processor's diagonal portion 2309 to be its local rows and the corresponding columns (a square submatrix); 2310 each processor's off-diagonal portion encompasses the remainder of the 2311 local matrix (a rectangular submatrix). 2312 2313 The user can specify preallocated storage for the diagonal part of 2314 the local submatrix with either d_nz or d_nnz (not both). Set 2315 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2316 memory allocation. Likewise, specify preallocated storage for the 2317 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2318 2319 You can call MatGetInfo() to get information on how effective the preallocation was; 2320 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2321 You can also run with the option -info and look for messages with the string 2322 malloc in them to see if additional memory allocation was needed. 2323 2324 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2325 the figure below we depict these three local rows and all columns (0-11). 2326 2327 .vb 2328 0 1 2 3 4 5 6 7 8 9 10 11 2329 -------------------------- 2330 row 3 |. . . d d d o o o o o o 2331 row 4 |. . . d d d o o o o o o 2332 row 5 |. . . d d d o o o o o o 2333 -------------------------- 2334 .ve 2335 2336 Thus, any entries in the d locations are stored in the d (diagonal) 2337 submatrix, and any entries in the o locations are stored in the 2338 o (off-diagonal) submatrix. Note that the d matrix is stored in 2339 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2340 2341 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2342 plus the diagonal part of the d matrix, 2343 and o_nz should indicate the number of block nonzeros per row in the o matrix 2344 2345 In general, for PDE problems in which most nonzeros are near the diagonal, 2346 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2347 or you will get TERRIBLE performance; see the users' manual chapter on 2348 matrices. 2349 2350 Level: intermediate 2351 2352 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2353 @*/ 2354 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2355 { 2356 PetscErrorCode ierr; 2357 2358 PetscFunctionBegin; 2359 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2360 PetscValidType(B,1); 2361 PetscValidLogicalCollectiveInt(B,bs,2); 2362 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2363 PetscFunctionReturn(0); 2364 } 2365 2366 /*@C 2367 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2368 (block compressed row). For good matrix assembly performance 2369 the user should preallocate the matrix storage by setting the parameters 2370 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2371 performance can be increased by more than a factor of 50. 2372 2373 Collective 2374 2375 Input Parameters: 2376 + comm - MPI communicator 2377 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2378 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2379 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2380 This value should be the same as the local size used in creating the 2381 y vector for the matrix-vector product y = Ax. 2382 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2383 This value should be the same as the local size used in creating the 2384 x vector for the matrix-vector product y = Ax. 2385 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2386 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2387 . d_nz - number of block nonzeros per block row in diagonal portion of local 2388 submatrix (same for all local rows) 2389 . d_nnz - array containing the number of block nonzeros in the various block rows 2390 in the upper triangular portion of the in diagonal portion of the local 2391 (possibly different for each block block row) or NULL. 2392 If you plan to factor the matrix you must leave room for the diagonal entry and 2393 set its value even if it is zero. 2394 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2395 submatrix (same for all local rows). 2396 - o_nnz - array containing the number of nonzeros in the various block rows of the 2397 off-diagonal portion of the local submatrix (possibly different for 2398 each block row) or NULL. 2399 2400 Output Parameter: 2401 . A - the matrix 2402 2403 Options Database Keys: 2404 + -mat_no_unroll - uses code that does not unroll the loops in the 2405 block calculations (much slower) 2406 . -mat_block_size - size of the blocks to use 2407 - -mat_mpi - use the parallel matrix data structures even on one processor 2408 (defaults to using SeqBAIJ format on one processor) 2409 2410 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2411 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2412 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2413 2414 Notes: 2415 The number of rows and columns must be divisible by blocksize. 2416 This matrix type does not support complex Hermitian operation. 2417 2418 The user MUST specify either the local or global matrix dimensions 2419 (possibly both). 2420 2421 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2422 than it must be used on all processors that share the object for that argument. 2423 2424 If the *_nnz parameter is given then the *_nz parameter is ignored 2425 2426 Storage Information: 2427 For a square global matrix we define each processor's diagonal portion 2428 to be its local rows and the corresponding columns (a square submatrix); 2429 each processor's off-diagonal portion encompasses the remainder of the 2430 local matrix (a rectangular submatrix). 2431 2432 The user can specify preallocated storage for the diagonal part of 2433 the local submatrix with either d_nz or d_nnz (not both). Set 2434 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2435 memory allocation. Likewise, specify preallocated storage for the 2436 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2437 2438 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2439 the figure below we depict these three local rows and all columns (0-11). 2440 2441 .vb 2442 0 1 2 3 4 5 6 7 8 9 10 11 2443 -------------------------- 2444 row 3 |. . . d d d o o o o o o 2445 row 4 |. . . d d d o o o o o o 2446 row 5 |. . . d d d o o o o o o 2447 -------------------------- 2448 .ve 2449 2450 Thus, any entries in the d locations are stored in the d (diagonal) 2451 submatrix, and any entries in the o locations are stored in the 2452 o (off-diagonal) submatrix. Note that the d matrix is stored in 2453 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2454 2455 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2456 plus the diagonal part of the d matrix, 2457 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2458 In general, for PDE problems in which most nonzeros are near the diagonal, 2459 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2460 or you will get TERRIBLE performance; see the users' manual chapter on 2461 matrices. 2462 2463 Level: intermediate 2464 2465 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2466 @*/ 2467 2468 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2469 { 2470 PetscErrorCode ierr; 2471 PetscMPIInt size; 2472 2473 PetscFunctionBegin; 2474 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2475 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2476 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2477 if (size > 1) { 2478 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2479 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2480 } else { 2481 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2482 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2483 } 2484 PetscFunctionReturn(0); 2485 } 2486 2487 2488 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2489 { 2490 Mat mat; 2491 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2492 PetscErrorCode ierr; 2493 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2494 PetscScalar *array; 2495 2496 PetscFunctionBegin; 2497 *newmat = 0; 2498 2499 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2500 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2501 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2502 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2503 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2504 2505 mat->factortype = matin->factortype; 2506 mat->preallocated = PETSC_TRUE; 2507 mat->assembled = PETSC_TRUE; 2508 mat->insertmode = NOT_SET_VALUES; 2509 2510 a = (Mat_MPISBAIJ*)mat->data; 2511 a->bs2 = oldmat->bs2; 2512 a->mbs = oldmat->mbs; 2513 a->nbs = oldmat->nbs; 2514 a->Mbs = oldmat->Mbs; 2515 a->Nbs = oldmat->Nbs; 2516 2517 a->size = oldmat->size; 2518 a->rank = oldmat->rank; 2519 a->donotstash = oldmat->donotstash; 2520 a->roworiented = oldmat->roworiented; 2521 a->rowindices = 0; 2522 a->rowvalues = 0; 2523 a->getrowactive = PETSC_FALSE; 2524 a->barray = 0; 2525 a->rstartbs = oldmat->rstartbs; 2526 a->rendbs = oldmat->rendbs; 2527 a->cstartbs = oldmat->cstartbs; 2528 a->cendbs = oldmat->cendbs; 2529 2530 /* hash table stuff */ 2531 a->ht = 0; 2532 a->hd = 0; 2533 a->ht_size = 0; 2534 a->ht_flag = oldmat->ht_flag; 2535 a->ht_fact = oldmat->ht_fact; 2536 a->ht_total_ct = 0; 2537 a->ht_insert_ct = 0; 2538 2539 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr); 2540 if (oldmat->colmap) { 2541 #if defined(PETSC_USE_CTABLE) 2542 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2543 #else 2544 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2545 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2546 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 2547 #endif 2548 } else a->colmap = 0; 2549 2550 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2551 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2552 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2553 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 2554 } else a->garray = 0; 2555 2556 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2557 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2558 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2559 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2560 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2561 2562 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2563 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2564 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2565 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2566 2567 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2568 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2569 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2570 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2571 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2572 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2573 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2574 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2575 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2576 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2577 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2578 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2579 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2580 2581 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2582 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2583 a->sMvctx = oldmat->sMvctx; 2584 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2585 2586 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2587 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2588 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2589 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2590 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2591 *newmat = mat; 2592 PetscFunctionReturn(0); 2593 } 2594 2595 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2596 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2597 2598 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer) 2599 { 2600 PetscErrorCode ierr; 2601 PetscBool isbinary; 2602 2603 PetscFunctionBegin; 2604 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2605 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 2606 ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 2607 PetscFunctionReturn(0); 2608 } 2609 2610 /*XXXXX@ 2611 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2612 2613 Input Parameters: 2614 . mat - the matrix 2615 . fact - factor 2616 2617 Not Collective on Mat, each process can have a different hash factor 2618 2619 Level: advanced 2620 2621 Notes: 2622 This can also be set by the command line option: -mat_use_hash_table fact 2623 2624 .seealso: MatSetOption() 2625 @XXXXX*/ 2626 2627 2628 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2629 { 2630 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2631 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2632 PetscReal atmp; 2633 PetscReal *work,*svalues,*rvalues; 2634 PetscErrorCode ierr; 2635 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2636 PetscMPIInt rank,size; 2637 PetscInt *rowners_bs,dest,count,source; 2638 PetscScalar *va; 2639 MatScalar *ba; 2640 MPI_Status stat; 2641 2642 PetscFunctionBegin; 2643 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2644 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2645 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2646 2647 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2648 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2649 2650 bs = A->rmap->bs; 2651 mbs = a->mbs; 2652 Mbs = a->Mbs; 2653 ba = b->a; 2654 bi = b->i; 2655 bj = b->j; 2656 2657 /* find ownerships */ 2658 rowners_bs = A->rmap->range; 2659 2660 /* each proc creates an array to be distributed */ 2661 ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2662 2663 /* row_max for B */ 2664 if (rank != size-1) { 2665 for (i=0; i<mbs; i++) { 2666 ncols = bi[1] - bi[0]; bi++; 2667 brow = bs*i; 2668 for (j=0; j<ncols; j++) { 2669 bcol = bs*(*bj); 2670 for (kcol=0; kcol<bs; kcol++) { 2671 col = bcol + kcol; /* local col index */ 2672 col += rowners_bs[rank+1]; /* global col index */ 2673 for (krow=0; krow<bs; krow++) { 2674 atmp = PetscAbsScalar(*ba); ba++; 2675 row = brow + krow; /* local row index */ 2676 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2677 if (work[col] < atmp) work[col] = atmp; 2678 } 2679 } 2680 bj++; 2681 } 2682 } 2683 2684 /* send values to its owners */ 2685 for (dest=rank+1; dest<size; dest++) { 2686 svalues = work + rowners_bs[dest]; 2687 count = rowners_bs[dest+1]-rowners_bs[dest]; 2688 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2689 } 2690 } 2691 2692 /* receive values */ 2693 if (rank) { 2694 rvalues = work; 2695 count = rowners_bs[rank+1]-rowners_bs[rank]; 2696 for (source=0; source<rank; source++) { 2697 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2698 /* process values */ 2699 for (i=0; i<count; i++) { 2700 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2701 } 2702 } 2703 } 2704 2705 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2706 ierr = PetscFree(work);CHKERRQ(ierr); 2707 PetscFunctionReturn(0); 2708 } 2709 2710 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2711 { 2712 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2713 PetscErrorCode ierr; 2714 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2715 PetscScalar *x,*ptr,*from; 2716 Vec bb1; 2717 const PetscScalar *b; 2718 2719 PetscFunctionBegin; 2720 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2721 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2722 2723 if (flag == SOR_APPLY_UPPER) { 2724 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2725 PetscFunctionReturn(0); 2726 } 2727 2728 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2729 if (flag & SOR_ZERO_INITIAL_GUESS) { 2730 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2731 its--; 2732 } 2733 2734 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2735 while (its--) { 2736 2737 /* lower triangular part: slvec0b = - B^T*xx */ 2738 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2739 2740 /* copy xx into slvec0a */ 2741 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2742 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2743 ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr); 2744 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2745 2746 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2747 2748 /* copy bb into slvec1a */ 2749 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2750 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2751 ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr); 2752 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2753 2754 /* set slvec1b = 0 */ 2755 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2756 2757 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2758 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2759 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2760 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2761 2762 /* upper triangular part: bb1 = bb1 - B*x */ 2763 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2764 2765 /* local diagonal sweep */ 2766 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2767 } 2768 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2769 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2770 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2771 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2772 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2773 } else if (flag & SOR_EISENSTAT) { 2774 Vec xx1; 2775 PetscBool hasop; 2776 const PetscScalar *diag; 2777 PetscScalar *sl,scale = (omega - 2.0)/omega; 2778 PetscInt i,n; 2779 2780 if (!mat->xx1) { 2781 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2782 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2783 } 2784 xx1 = mat->xx1; 2785 bb1 = mat->bb1; 2786 2787 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2788 2789 if (!mat->diag) { 2790 /* this is wrong for same matrix with new nonzero values */ 2791 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2792 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2793 } 2794 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2795 2796 if (hasop) { 2797 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2798 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2799 } else { 2800 /* 2801 These two lines are replaced by code that may be a bit faster for a good compiler 2802 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2803 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2804 */ 2805 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2806 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2807 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2808 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2809 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2810 if (omega == 1.0) { 2811 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2812 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2813 } else { 2814 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2815 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2816 } 2817 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2818 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2819 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2820 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2821 } 2822 2823 /* multiply off-diagonal portion of matrix */ 2824 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2825 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2826 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2827 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2828 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 2829 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2830 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2831 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2832 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2833 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2834 2835 /* local sweep */ 2836 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2837 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2838 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2839 PetscFunctionReturn(0); 2840 } 2841 2842 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2843 { 2844 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2845 PetscErrorCode ierr; 2846 Vec lvec1,bb1; 2847 2848 PetscFunctionBegin; 2849 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2850 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2851 2852 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2853 if (flag & SOR_ZERO_INITIAL_GUESS) { 2854 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2855 its--; 2856 } 2857 2858 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2859 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2860 while (its--) { 2861 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2862 2863 /* lower diagonal part: bb1 = bb - B^T*xx */ 2864 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2865 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2866 2867 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2868 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2869 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2870 2871 /* upper diagonal part: bb1 = bb1 - B*x */ 2872 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2873 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2874 2875 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2876 2877 /* diagonal sweep */ 2878 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2879 } 2880 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2881 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2882 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2883 PetscFunctionReturn(0); 2884 } 2885 2886 /*@ 2887 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2888 CSR format the local rows. 2889 2890 Collective 2891 2892 Input Parameters: 2893 + comm - MPI communicator 2894 . bs - the block size, only a block size of 1 is supported 2895 . m - number of local rows (Cannot be PETSC_DECIDE) 2896 . n - This value should be the same as the local size used in creating the 2897 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2898 calculated if N is given) For square matrices n is almost always m. 2899 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2900 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2901 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2902 . j - column indices 2903 - a - matrix values 2904 2905 Output Parameter: 2906 . mat - the matrix 2907 2908 Level: intermediate 2909 2910 Notes: 2911 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2912 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2913 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2914 2915 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2916 2917 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2918 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2919 @*/ 2920 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2921 { 2922 PetscErrorCode ierr; 2923 2924 2925 PetscFunctionBegin; 2926 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2927 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2928 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2929 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2930 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2931 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2932 PetscFunctionReturn(0); 2933 } 2934 2935 2936 /*@C 2937 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 2938 2939 Collective 2940 2941 Input Parameters: 2942 + B - the matrix 2943 . bs - the block size 2944 . i - the indices into j for the start of each local row (starts with zero) 2945 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2946 - v - optional values in the matrix 2947 2948 Level: advanced 2949 2950 Notes: 2951 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2952 and usually the numerical values as well 2953 2954 Any entries below the diagonal are ignored 2955 2956 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2957 @*/ 2958 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2959 { 2960 PetscErrorCode ierr; 2961 2962 PetscFunctionBegin; 2963 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2964 PetscFunctionReturn(0); 2965 } 2966 2967 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2968 { 2969 PetscErrorCode ierr; 2970 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2971 PetscInt *indx; 2972 PetscScalar *values; 2973 2974 PetscFunctionBegin; 2975 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2976 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2977 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2978 PetscInt *dnz,*onz,mbs,Nbs,nbs; 2979 PetscInt *bindx,rmax=a->rmax,j; 2980 PetscMPIInt rank,size; 2981 2982 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2983 mbs = m/bs; Nbs = N/cbs; 2984 if (n == PETSC_DECIDE) { 2985 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 2986 } 2987 nbs = n/cbs; 2988 2989 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2990 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 2991 2992 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2993 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 2994 if (rank == size-1) { 2995 /* Check sum(nbs) = Nbs */ 2996 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 2997 } 2998 2999 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3000 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3001 for (i=0; i<mbs; i++) { 3002 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3003 nnz = nnz/bs; 3004 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3005 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3006 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3007 } 3008 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3009 ierr = PetscFree(bindx);CHKERRQ(ierr); 3010 3011 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3012 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3013 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3014 ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr); 3015 ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3016 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3017 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3018 } 3019 3020 /* numeric phase */ 3021 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3022 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3023 3024 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3025 for (i=0; i<m; i++) { 3026 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3027 Ii = i + rstart; 3028 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3029 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3030 } 3031 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3032 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3033 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3034 PetscFunctionReturn(0); 3035 } 3036