1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 /* This could be moved to matimpl.h */ 12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 13 { 14 Mat preallocator; 15 PetscInt r,rstart,rend; 16 PetscInt bs,i,m,n,M,N; 17 PetscBool cong = PETSC_TRUE; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 22 PetscValidLogicalCollectiveInt(B,nm,2); 23 for (i = 0; i < nm; i++) { 24 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 25 ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 26 if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 27 } 28 PetscValidLogicalCollectiveBool(B,fill,5); 29 ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 30 ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 31 ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 32 ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 33 ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 34 ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 35 ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 36 ierr = MatSetUp(preallocator);CHKERRQ(ierr); 37 ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 38 for (r = rstart; r < rend; ++r) { 39 PetscInt ncols; 40 const PetscInt *row; 41 const PetscScalar *vals; 42 43 for (i = 0; i < nm; i++) { 44 ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 45 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 46 if (symm && symm[i]) { 47 ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 48 } 49 ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 50 } 51 } 52 ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 53 ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 54 ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 55 ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 60 { 61 Mat B; 62 PetscErrorCode ierr; 63 PetscInt r; 64 65 PetscFunctionBegin; 66 if (reuse != MAT_REUSE_MATRIX) { 67 PetscBool symm = PETSC_TRUE,isdense; 68 PetscInt bs; 69 70 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 71 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 72 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 73 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 74 ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 75 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 76 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 77 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr); 78 if (!isdense) { 79 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 80 ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 81 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 82 } else { 83 ierr = MatSetUp(B);CHKERRQ(ierr); 84 } 85 } else { 86 B = *newmat; 87 ierr = MatZeroEntries(B);CHKERRQ(ierr); 88 } 89 90 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 91 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 92 PetscInt ncols; 93 const PetscInt *row; 94 const PetscScalar *vals; 95 96 ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 97 ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 98 #if defined(PETSC_USE_COMPLEX) 99 if (A->hermitian) { 100 PetscInt i; 101 for (i = 0; i < ncols; i++) { 102 ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr); 103 } 104 } else { 105 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 106 } 107 #else 108 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 109 #endif 110 ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 111 } 112 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 113 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 114 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 115 116 if (reuse == MAT_INPLACE_MATRIX) { 117 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 118 } else { 119 *newmat = B; 120 } 121 PetscFunctionReturn(0); 122 } 123 124 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 125 { 126 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 131 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 136 { 137 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 138 PetscErrorCode ierr; 139 140 PetscFunctionBegin; 141 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 142 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 147 { \ 148 brow = row/bs; \ 149 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 150 rmax = aimax[brow]; nrow = ailen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto a_noinsert; \ 166 } \ 167 } \ 168 if (a->nonew == 1) goto a_noinsert; \ 169 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 170 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 171 N = nrow++ - 1; \ 172 /* shift up all the later entries in this row */ \ 173 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 174 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 175 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 176 rp[_i] = bcol; \ 177 ap[bs2*_i + bs*cidx + ridx] = value; \ 178 A->nonzerostate++;\ 179 a_noinsert:; \ 180 ailen[brow] = nrow; \ 181 } 182 183 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 184 { \ 185 brow = row/bs; \ 186 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 187 rmax = bimax[brow]; nrow = bilen[brow]; \ 188 bcol = col/bs; \ 189 ridx = row % bs; cidx = col % bs; \ 190 low = 0; high = nrow; \ 191 while (high-low > 3) { \ 192 t = (low+high)/2; \ 193 if (rp[t] > bcol) high = t; \ 194 else low = t; \ 195 } \ 196 for (_i=low; _i<high; _i++) { \ 197 if (rp[_i] > bcol) break; \ 198 if (rp[_i] == bcol) { \ 199 bap = ap + bs2*_i + bs*cidx + ridx; \ 200 if (addv == ADD_VALUES) *bap += value; \ 201 else *bap = value; \ 202 goto b_noinsert; \ 203 } \ 204 } \ 205 if (b->nonew == 1) goto b_noinsert; \ 206 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 207 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 208 N = nrow++ - 1; \ 209 /* shift up all the later entries in this row */ \ 210 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 211 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 212 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 213 rp[_i] = bcol; \ 214 ap[bs2*_i + bs*cidx + ridx] = value; \ 215 B->nonzerostate++;\ 216 b_noinsert:; \ 217 bilen[brow] = nrow; \ 218 } 219 220 /* Only add/insert a(i,j) with i<=j (blocks). 221 Any a(i,j) with i>j input by user is ingored or generates an error 222 */ 223 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 224 { 225 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 226 MatScalar value; 227 PetscBool roworiented = baij->roworiented; 228 PetscErrorCode ierr; 229 PetscInt i,j,row,col; 230 PetscInt rstart_orig=mat->rmap->rstart; 231 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 232 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 233 234 /* Some Variables required in the macro */ 235 Mat A = baij->A; 236 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 237 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 238 MatScalar *aa =a->a; 239 240 Mat B = baij->B; 241 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 242 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 243 MatScalar *ba =b->a; 244 245 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 246 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 247 MatScalar *ap,*bap; 248 249 /* for stash */ 250 PetscInt n_loc, *in_loc = NULL; 251 MatScalar *v_loc = NULL; 252 253 PetscFunctionBegin; 254 if (!baij->donotstash) { 255 if (n > baij->n_loc) { 256 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 257 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 258 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 259 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 260 261 baij->n_loc = n; 262 } 263 in_loc = baij->in_loc; 264 v_loc = baij->v_loc; 265 } 266 267 for (i=0; i<m; i++) { 268 if (im[i] < 0) continue; 269 #if defined(PETSC_USE_DEBUG) 270 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 271 #endif 272 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 273 row = im[i] - rstart_orig; /* local row index */ 274 for (j=0; j<n; j++) { 275 if (im[i]/bs > in[j]/bs) { 276 if (a->ignore_ltriangular) { 277 continue; /* ignore lower triangular blocks */ 278 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 279 } 280 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 281 col = in[j] - cstart_orig; /* local col index */ 282 brow = row/bs; bcol = col/bs; 283 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 284 if (roworiented) value = v[i*n+j]; 285 else value = v[i+j*m]; 286 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 287 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 288 } else if (in[j] < 0) continue; 289 #if defined(PETSC_USE_DEBUG) 290 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 291 #endif 292 else { /* off-diag entry (B) */ 293 if (mat->was_assembled) { 294 if (!baij->colmap) { 295 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 296 } 297 #if defined(PETSC_USE_CTABLE) 298 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 299 col = col - 1; 300 #else 301 col = baij->colmap[in[j]/bs] - 1; 302 #endif 303 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 304 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 305 col = in[j]; 306 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 307 B = baij->B; 308 b = (Mat_SeqBAIJ*)(B)->data; 309 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 310 ba = b->a; 311 } else col += in[j]%bs; 312 } else col = in[j]; 313 if (roworiented) value = v[i*n+j]; 314 else value = v[i+j*m]; 315 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 316 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 317 } 318 } 319 } else { /* off processor entry */ 320 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 321 if (!baij->donotstash) { 322 mat->assembled = PETSC_FALSE; 323 n_loc = 0; 324 for (j=0; j<n; j++) { 325 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 326 in_loc[n_loc] = in[j]; 327 if (roworiented) { 328 v_loc[n_loc] = v[i*n+j]; 329 } else { 330 v_loc[n_loc] = v[j*m+i]; 331 } 332 n_loc++; 333 } 334 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 335 } 336 } 337 } 338 PetscFunctionReturn(0); 339 } 340 341 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 342 { 343 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 344 PetscErrorCode ierr; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 348 PetscBool roworiented=a->roworiented; 349 const PetscScalar *value = v; 350 MatScalar *ap,*aa = a->a,*bap; 351 352 PetscFunctionBegin; 353 if (col < row) { 354 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 355 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 356 } 357 rp = aj + ai[row]; 358 ap = aa + bs2*ai[row]; 359 rmax = imax[row]; 360 nrow = ailen[row]; 361 value = v; 362 low = 0; 363 high = nrow; 364 365 while (high-low > 7) { 366 t = (low+high)/2; 367 if (rp[t] > col) high = t; 368 else low = t; 369 } 370 for (i=low; i<high; i++) { 371 if (rp[i] > col) break; 372 if (rp[i] == col) { 373 bap = ap + bs2*i; 374 if (roworiented) { 375 if (is == ADD_VALUES) { 376 for (ii=0; ii<bs; ii++) { 377 for (jj=ii; jj<bs2; jj+=bs) { 378 bap[jj] += *value++; 379 } 380 } 381 } else { 382 for (ii=0; ii<bs; ii++) { 383 for (jj=ii; jj<bs2; jj+=bs) { 384 bap[jj] = *value++; 385 } 386 } 387 } 388 } else { 389 if (is == ADD_VALUES) { 390 for (ii=0; ii<bs; ii++) { 391 for (jj=0; jj<bs; jj++) { 392 *bap++ += *value++; 393 } 394 } 395 } else { 396 for (ii=0; ii<bs; ii++) { 397 for (jj=0; jj<bs; jj++) { 398 *bap++ = *value++; 399 } 400 } 401 } 402 } 403 goto noinsert2; 404 } 405 } 406 if (nonew == 1) goto noinsert2; 407 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 408 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 409 N = nrow++ - 1; high++; 410 /* shift up all the later entries in this row */ 411 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 412 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 413 rp[i] = col; 414 bap = ap + bs2*i; 415 if (roworiented) { 416 for (ii=0; ii<bs; ii++) { 417 for (jj=ii; jj<bs2; jj+=bs) { 418 bap[jj] = *value++; 419 } 420 } 421 } else { 422 for (ii=0; ii<bs; ii++) { 423 for (jj=0; jj<bs; jj++) { 424 *bap++ = *value++; 425 } 426 } 427 } 428 noinsert2:; 429 ailen[row] = nrow; 430 PetscFunctionReturn(0); 431 } 432 433 /* 434 This routine is exactly duplicated in mpibaij.c 435 */ 436 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 437 { 438 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 439 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 440 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 441 PetscErrorCode ierr; 442 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 443 PetscBool roworiented=a->roworiented; 444 const PetscScalar *value = v; 445 MatScalar *ap,*aa = a->a,*bap; 446 447 PetscFunctionBegin; 448 rp = aj + ai[row]; 449 ap = aa + bs2*ai[row]; 450 rmax = imax[row]; 451 nrow = ailen[row]; 452 low = 0; 453 high = nrow; 454 value = v; 455 while (high-low > 7) { 456 t = (low+high)/2; 457 if (rp[t] > col) high = t; 458 else low = t; 459 } 460 for (i=low; i<high; i++) { 461 if (rp[i] > col) break; 462 if (rp[i] == col) { 463 bap = ap + bs2*i; 464 if (roworiented) { 465 if (is == ADD_VALUES) { 466 for (ii=0; ii<bs; ii++) { 467 for (jj=ii; jj<bs2; jj+=bs) { 468 bap[jj] += *value++; 469 } 470 } 471 } else { 472 for (ii=0; ii<bs; ii++) { 473 for (jj=ii; jj<bs2; jj+=bs) { 474 bap[jj] = *value++; 475 } 476 } 477 } 478 } else { 479 if (is == ADD_VALUES) { 480 for (ii=0; ii<bs; ii++,value+=bs) { 481 for (jj=0; jj<bs; jj++) { 482 bap[jj] += value[jj]; 483 } 484 bap += bs; 485 } 486 } else { 487 for (ii=0; ii<bs; ii++,value+=bs) { 488 for (jj=0; jj<bs; jj++) { 489 bap[jj] = value[jj]; 490 } 491 bap += bs; 492 } 493 } 494 } 495 goto noinsert2; 496 } 497 } 498 if (nonew == 1) goto noinsert2; 499 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 500 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 501 N = nrow++ - 1; high++; 502 /* shift up all the later entries in this row */ 503 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 504 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 505 rp[i] = col; 506 bap = ap + bs2*i; 507 if (roworiented) { 508 for (ii=0; ii<bs; ii++) { 509 for (jj=ii; jj<bs2; jj+=bs) { 510 bap[jj] = *value++; 511 } 512 } 513 } else { 514 for (ii=0; ii<bs; ii++) { 515 for (jj=0; jj<bs; jj++) { 516 *bap++ = *value++; 517 } 518 } 519 } 520 noinsert2:; 521 ailen[row] = nrow; 522 PetscFunctionReturn(0); 523 } 524 525 /* 526 This routine could be optimized by removing the need for the block copy below and passing stride information 527 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 528 */ 529 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 530 { 531 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 532 const MatScalar *value; 533 MatScalar *barray =baij->barray; 534 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 535 PetscErrorCode ierr; 536 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 537 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 538 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 539 540 PetscFunctionBegin; 541 if (!barray) { 542 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 543 baij->barray = barray; 544 } 545 546 if (roworiented) { 547 stepval = (n-1)*bs; 548 } else { 549 stepval = (m-1)*bs; 550 } 551 for (i=0; i<m; i++) { 552 if (im[i] < 0) continue; 553 #if defined(PETSC_USE_DEBUG) 554 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 555 #endif 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 for (j=0; j<n; j++) { 559 if (im[i] > in[j]) { 560 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 561 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 562 } 563 /* If NumCol = 1 then a copy is not required */ 564 if ((roworiented) && (n == 1)) { 565 barray = (MatScalar*) v + i*bs2; 566 } else if ((!roworiented) && (m == 1)) { 567 barray = (MatScalar*) v + j*bs2; 568 } else { /* Here a copy is required */ 569 if (roworiented) { 570 value = v + i*(stepval+bs)*bs + j*bs; 571 } else { 572 value = v + j*(stepval+bs)*bs + i*bs; 573 } 574 for (ii=0; ii<bs; ii++,value+=stepval) { 575 for (jj=0; jj<bs; jj++) { 576 *barray++ = *value++; 577 } 578 } 579 barray -=bs2; 580 } 581 582 if (in[j] >= cstart && in[j] < cend) { 583 col = in[j] - cstart; 584 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 585 } else if (in[j] < 0) continue; 586 #if defined(PETSC_USE_DEBUG) 587 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 588 #endif 589 else { 590 if (mat->was_assembled) { 591 if (!baij->colmap) { 592 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 593 } 594 595 #if defined(PETSC_USE_DEBUG) 596 #if defined(PETSC_USE_CTABLE) 597 { PetscInt data; 598 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 599 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 600 } 601 #else 602 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 603 #endif 604 #endif 605 #if defined(PETSC_USE_CTABLE) 606 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 607 col = (col - 1)/bs; 608 #else 609 col = (baij->colmap[in[j]] - 1)/bs; 610 #endif 611 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 612 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 613 col = in[j]; 614 } 615 } else col = in[j]; 616 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!baij->donotstash) { 622 if (roworiented) { 623 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 624 } else { 625 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 626 } 627 } 628 } 629 } 630 PetscFunctionReturn(0); 631 } 632 633 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 634 { 635 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 636 PetscErrorCode ierr; 637 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 638 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 639 640 PetscFunctionBegin; 641 for (i=0; i<m; i++) { 642 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 643 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 644 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 645 row = idxm[i] - bsrstart; 646 for (j=0; j<n; j++) { 647 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 648 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 649 if (idxn[j] >= bscstart && idxn[j] < bscend) { 650 col = idxn[j] - bscstart; 651 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 652 } else { 653 if (!baij->colmap) { 654 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 655 } 656 #if defined(PETSC_USE_CTABLE) 657 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 658 data--; 659 #else 660 data = baij->colmap[idxn[j]/bs]-1; 661 #endif 662 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 663 else { 664 col = data + idxn[j]%bs; 665 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 666 } 667 } 668 } 669 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 670 } 671 PetscFunctionReturn(0); 672 } 673 674 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 675 { 676 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 677 PetscErrorCode ierr; 678 PetscReal sum[2],*lnorm2; 679 680 PetscFunctionBegin; 681 if (baij->size == 1) { 682 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 683 } else { 684 if (type == NORM_FROBENIUS) { 685 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 686 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 687 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 688 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 689 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 690 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 691 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 692 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 693 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 694 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 695 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 696 PetscReal *rsum,*rsum2,vabs; 697 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 698 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 699 MatScalar *v; 700 701 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 702 ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr); 703 /* Amat */ 704 v = amat->a; jj = amat->j; 705 for (brow=0; brow<mbs; brow++) { 706 grow = bs*(rstart + brow); 707 nz = amat->i[brow+1] - amat->i[brow]; 708 for (bcol=0; bcol<nz; bcol++) { 709 gcol = bs*(rstart + *jj); jj++; 710 for (col=0; col<bs; col++) { 711 for (row=0; row<bs; row++) { 712 vabs = PetscAbsScalar(*v); v++; 713 rsum[gcol+col] += vabs; 714 /* non-diagonal block */ 715 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 716 } 717 } 718 } 719 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 720 } 721 /* Bmat */ 722 v = bmat->a; jj = bmat->j; 723 for (brow=0; brow<mbs; brow++) { 724 grow = bs*(rstart + brow); 725 nz = bmat->i[brow+1] - bmat->i[brow]; 726 for (bcol=0; bcol<nz; bcol++) { 727 gcol = bs*garray[*jj]; jj++; 728 for (col=0; col<bs; col++) { 729 for (row=0; row<bs; row++) { 730 vabs = PetscAbsScalar(*v); v++; 731 rsum[gcol+col] += vabs; 732 rsum[grow+row] += vabs; 733 } 734 } 735 } 736 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 737 } 738 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 739 *norm = 0.0; 740 for (col=0; col<mat->cmap->N; col++) { 741 if (rsum2[col] > *norm) *norm = rsum2[col]; 742 } 743 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 744 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 745 } 746 PetscFunctionReturn(0); 747 } 748 749 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 750 { 751 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 752 PetscErrorCode ierr; 753 PetscInt nstash,reallocs; 754 755 PetscFunctionBegin; 756 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 757 758 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 759 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 760 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 761 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 762 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 763 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 768 { 769 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 770 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 771 PetscErrorCode ierr; 772 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 773 PetscInt *row,*col; 774 PetscBool other_disassembled; 775 PetscMPIInt n; 776 PetscBool r1,r2,r3; 777 MatScalar *val; 778 779 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 780 PetscFunctionBegin; 781 if (!baij->donotstash && !mat->nooffprocentries) { 782 while (1) { 783 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 784 if (!flg) break; 785 786 for (i=0; i<n;) { 787 /* Now identify the consecutive vals belonging to the same row */ 788 for (j=i,rstart=row[j]; j<n; j++) { 789 if (row[j] != rstart) break; 790 } 791 if (j < n) ncols = j-i; 792 else ncols = n-i; 793 /* Now assemble all these values with a single function call */ 794 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 795 i = j; 796 } 797 } 798 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 799 /* Now process the block-stash. Since the values are stashed column-oriented, 800 set the roworiented flag to column oriented, and after MatSetValues() 801 restore the original flags */ 802 r1 = baij->roworiented; 803 r2 = a->roworiented; 804 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 805 806 baij->roworiented = PETSC_FALSE; 807 a->roworiented = PETSC_FALSE; 808 809 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 810 while (1) { 811 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 812 if (!flg) break; 813 814 for (i=0; i<n;) { 815 /* Now identify the consecutive vals belonging to the same row */ 816 for (j=i,rstart=row[j]; j<n; j++) { 817 if (row[j] != rstart) break; 818 } 819 if (j < n) ncols = j-i; 820 else ncols = n-i; 821 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 822 i = j; 823 } 824 } 825 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 826 827 baij->roworiented = r1; 828 a->roworiented = r2; 829 830 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 831 } 832 833 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 835 836 /* determine if any processor has disassembled, if so we must 837 also disassemble ourselfs, in order that we may reassemble. */ 838 /* 839 if nonzero structure of submatrix B cannot change then we know that 840 no processor disassembled thus we can skip this stuff 841 */ 842 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 843 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 844 if (mat->was_assembled && !other_disassembled) { 845 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 846 } 847 } 848 849 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 850 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 851 } 852 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 853 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 854 855 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 856 857 baij->rowvalues = 0; 858 859 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 860 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 861 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 862 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 863 } 864 PetscFunctionReturn(0); 865 } 866 867 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 868 #include <petscdraw.h> 869 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 870 { 871 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 872 PetscErrorCode ierr; 873 PetscInt bs = mat->rmap->bs; 874 PetscMPIInt rank = baij->rank; 875 PetscBool iascii,isdraw; 876 PetscViewer sviewer; 877 PetscViewerFormat format; 878 879 PetscFunctionBegin; 880 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 881 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 882 if (iascii) { 883 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 884 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 885 MatInfo info; 886 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 889 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 890 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 892 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 894 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 895 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 896 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 897 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } else if (format == PETSC_VIEWER_ASCII_INFO) { 900 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 903 PetscFunctionReturn(0); 904 } 905 } 906 907 if (isdraw) { 908 PetscDraw draw; 909 PetscBool isnull; 910 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 911 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 912 if (isnull) PetscFunctionReturn(0); 913 } 914 915 { 916 /* assemble the entire matrix onto first processor. */ 917 Mat A; 918 Mat_SeqSBAIJ *Aloc; 919 Mat_SeqBAIJ *Bloc; 920 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 921 MatScalar *a; 922 const char *matname; 923 924 /* Should this be the same type as mat? */ 925 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 926 if (!rank) { 927 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 928 } else { 929 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 930 } 931 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 932 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 933 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 934 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 935 936 /* copy over the A part */ 937 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 938 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 939 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 940 941 for (i=0; i<mbs; i++) { 942 rvals[0] = bs*(baij->rstartbs + i); 943 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 944 for (j=ai[i]; j<ai[i+1]; j++) { 945 col = (baij->cstartbs+aj[j])*bs; 946 for (k=0; k<bs; k++) { 947 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 948 col++; 949 a += bs; 950 } 951 } 952 } 953 /* copy over the B part */ 954 Bloc = (Mat_SeqBAIJ*)baij->B->data; 955 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 956 for (i=0; i<mbs; i++) { 957 958 rvals[0] = bs*(baij->rstartbs + i); 959 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 960 for (j=ai[i]; j<ai[i+1]; j++) { 961 col = baij->garray[aj[j]]*bs; 962 for (k=0; k<bs; k++) { 963 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 964 col++; 965 a += bs; 966 } 967 } 968 } 969 ierr = PetscFree(rvals);CHKERRQ(ierr); 970 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 /* 973 Everyone has to call to draw the matrix since the graphics waits are 974 synchronized across all processors that share the PetscDraw object 975 */ 976 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 977 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 978 if (!rank) { 979 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 980 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 981 } 982 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 983 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 984 ierr = MatDestroy(&A);CHKERRQ(ierr); 985 } 986 PetscFunctionReturn(0); 987 } 988 989 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 990 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 991 992 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 993 { 994 PetscErrorCode ierr; 995 PetscBool iascii,isdraw,issocket,isbinary; 996 997 PetscFunctionBegin; 998 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 999 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1000 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1001 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1002 if (iascii || isdraw || issocket) { 1003 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1004 } else if (isbinary) { 1005 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1006 } 1007 PetscFunctionReturn(0); 1008 } 1009 1010 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1011 { 1012 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1013 PetscErrorCode ierr; 1014 1015 PetscFunctionBegin; 1016 #if defined(PETSC_USE_LOG) 1017 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1018 #endif 1019 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1020 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1021 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1022 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1023 #if defined(PETSC_USE_CTABLE) 1024 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1025 #else 1026 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1027 #endif 1028 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1029 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1030 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1031 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1032 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1033 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1034 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1035 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1036 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1037 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1038 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1039 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1040 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1041 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1042 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1043 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1044 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1045 #endif 1046 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1047 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1048 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1049 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1050 1051 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1052 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1053 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1054 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1055 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1056 #if defined(PETSC_HAVE_ELEMENTAL) 1057 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1058 #endif 1059 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1060 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1065 { 1066 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1067 PetscErrorCode ierr; 1068 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1069 PetscScalar *from; 1070 const PetscScalar *x; 1071 1072 PetscFunctionBegin; 1073 /* diagonal part */ 1074 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1075 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1076 1077 /* subdiagonal part */ 1078 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1079 1080 /* copy x into the vec slvec0 */ 1081 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1082 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1083 1084 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1085 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1086 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1087 1088 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 /* supperdiagonal part */ 1091 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1096 { 1097 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1100 PetscScalar *from; 1101 const PetscScalar *x; 1102 1103 PetscFunctionBegin; 1104 /* diagonal part */ 1105 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1106 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1107 1108 /* subdiagonal part */ 1109 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1110 1111 /* copy x into the vec slvec0 */ 1112 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1113 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1114 1115 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1116 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1117 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1118 1119 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1121 /* supperdiagonal part */ 1122 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1127 { 1128 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1129 PetscErrorCode ierr; 1130 PetscInt nt; 1131 1132 PetscFunctionBegin; 1133 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1134 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1135 1136 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1137 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1138 1139 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 /* do diagonal part */ 1141 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1142 /* do supperdiagonal part */ 1143 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1144 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1145 /* do subdiagonal part */ 1146 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz) 1153 { 1154 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1155 PetscErrorCode ierr; 1156 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1157 PetscScalar *from,zero=0.0; 1158 const PetscScalar *x; 1159 1160 PetscFunctionBegin; 1161 /* diagonal part */ 1162 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1163 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1164 1165 /* subdiagonal part */ 1166 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1167 1168 /* copy x into the vec slvec0 */ 1169 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1170 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1171 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1172 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1173 1174 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1175 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1176 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1177 1178 /* supperdiagonal part */ 1179 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1184 { 1185 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1186 PetscErrorCode ierr; 1187 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1188 PetscScalar *from,zero=0.0; 1189 const PetscScalar *x; 1190 1191 PetscFunctionBegin; 1192 /* diagonal part */ 1193 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1194 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1195 1196 /* subdiagonal part */ 1197 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1198 1199 /* copy x into the vec slvec0 */ 1200 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1201 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1202 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1203 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1204 1205 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1207 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1208 1209 /* supperdiagonal part */ 1210 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1215 { 1216 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 1219 PetscFunctionBegin; 1220 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1221 /* do diagonal part */ 1222 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1223 /* do supperdiagonal part */ 1224 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1225 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1226 1227 /* do subdiagonal part */ 1228 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 /* 1235 This only works correctly for square matrices where the subblock A->A is the 1236 diagonal block 1237 */ 1238 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1239 { 1240 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1245 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1250 { 1251 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1252 PetscErrorCode ierr; 1253 1254 PetscFunctionBegin; 1255 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1256 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1263 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1264 PetscErrorCode ierr; 1265 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1266 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1267 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1268 1269 PetscFunctionBegin; 1270 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1271 mat->getrowactive = PETSC_TRUE; 1272 1273 if (!mat->rowvalues && (idx || v)) { 1274 /* 1275 allocate enough space to hold information from the longest row. 1276 */ 1277 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1278 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1279 PetscInt max = 1,mbs = mat->mbs,tmp; 1280 for (i=0; i<mbs; i++) { 1281 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1282 if (max < tmp) max = tmp; 1283 } 1284 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1285 } 1286 1287 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1288 lrow = row - brstart; /* local row index */ 1289 1290 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291 if (!v) {pvA = 0; pvB = 0;} 1292 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295 nztot = nzA + nzB; 1296 1297 cmap = mat->garray; 1298 if (v || idx) { 1299 if (nztot) { 1300 /* Sort by increasing column numbers, assuming A and B already sorted */ 1301 PetscInt imark = -1; 1302 if (v) { 1303 *v = v_p = mat->rowvalues; 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1306 else break; 1307 } 1308 imark = i; 1309 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1310 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1311 } 1312 if (idx) { 1313 *idx = idx_p = mat->rowindices; 1314 if (imark > -1) { 1315 for (i=0; i<imark; i++) { 1316 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1317 } 1318 } else { 1319 for (i=0; i<nzB; i++) { 1320 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1321 else break; 1322 } 1323 imark = i; 1324 } 1325 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1326 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1327 } 1328 } else { 1329 if (idx) *idx = 0; 1330 if (v) *v = 0; 1331 } 1332 } 1333 *nz = nztot; 1334 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1335 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1340 { 1341 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1342 1343 PetscFunctionBegin; 1344 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1345 baij->getrowactive = PETSC_FALSE; 1346 PetscFunctionReturn(0); 1347 } 1348 1349 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1350 { 1351 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1352 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1353 1354 PetscFunctionBegin; 1355 aA->getrow_utriangular = PETSC_TRUE; 1356 PetscFunctionReturn(0); 1357 } 1358 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1359 { 1360 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1361 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1362 1363 PetscFunctionBegin; 1364 aA->getrow_utriangular = PETSC_FALSE; 1365 PetscFunctionReturn(0); 1366 } 1367 1368 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1369 { 1370 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1371 PetscErrorCode ierr; 1372 1373 PetscFunctionBegin; 1374 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1375 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1380 { 1381 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1382 PetscErrorCode ierr; 1383 1384 PetscFunctionBegin; 1385 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1386 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1387 PetscFunctionReturn(0); 1388 } 1389 1390 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1391 Input: isrow - distributed(parallel), 1392 iscol_local - locally owned (seq) 1393 */ 1394 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1395 { 1396 PetscErrorCode ierr; 1397 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1398 const PetscInt *ptr1,*ptr2; 1399 1400 PetscFunctionBegin; 1401 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1402 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1403 if (sz1 > sz2) { 1404 *flg = PETSC_FALSE; 1405 PetscFunctionReturn(0); 1406 } 1407 1408 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1409 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1410 1411 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1412 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1413 ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr); 1414 ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr); 1415 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1416 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1417 1418 nmatch=0; 1419 k = 0; 1420 for (i=0; i<sz1; i++){ 1421 for (j=k; j<sz2; j++){ 1422 if (a1[i] == a2[j]) { 1423 k = j; nmatch++; 1424 break; 1425 } 1426 } 1427 } 1428 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1429 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1430 ierr = PetscFree(a1);CHKERRQ(ierr); 1431 ierr = PetscFree(a2);CHKERRQ(ierr); 1432 if (nmatch < sz1) { 1433 *flg = PETSC_FALSE; 1434 } else { 1435 *flg = PETSC_TRUE; 1436 } 1437 PetscFunctionReturn(0); 1438 } 1439 1440 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1441 { 1442 PetscErrorCode ierr; 1443 IS iscol_local; 1444 PetscInt csize; 1445 PetscBool isequal; 1446 1447 PetscFunctionBegin; 1448 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1449 if (call == MAT_REUSE_MATRIX) { 1450 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1451 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1452 } else { 1453 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1454 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1455 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1456 } 1457 1458 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1459 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1460 if (call == MAT_INITIAL_MATRIX) { 1461 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1462 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1463 } 1464 PetscFunctionReturn(0); 1465 } 1466 1467 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1468 { 1469 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1470 PetscErrorCode ierr; 1471 1472 PetscFunctionBegin; 1473 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1474 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1479 { 1480 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1481 Mat A = a->A,B = a->B; 1482 PetscErrorCode ierr; 1483 PetscLogDouble isend[5],irecv[5]; 1484 1485 PetscFunctionBegin; 1486 info->block_size = (PetscReal)matin->rmap->bs; 1487 1488 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1489 1490 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1491 isend[3] = info->memory; isend[4] = info->mallocs; 1492 1493 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1494 1495 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1496 isend[3] += info->memory; isend[4] += info->mallocs; 1497 if (flag == MAT_LOCAL) { 1498 info->nz_used = isend[0]; 1499 info->nz_allocated = isend[1]; 1500 info->nz_unneeded = isend[2]; 1501 info->memory = isend[3]; 1502 info->mallocs = isend[4]; 1503 } else if (flag == MAT_GLOBAL_MAX) { 1504 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1505 1506 info->nz_used = irecv[0]; 1507 info->nz_allocated = irecv[1]; 1508 info->nz_unneeded = irecv[2]; 1509 info->memory = irecv[3]; 1510 info->mallocs = irecv[4]; 1511 } else if (flag == MAT_GLOBAL_SUM) { 1512 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1513 1514 info->nz_used = irecv[0]; 1515 info->nz_allocated = irecv[1]; 1516 info->nz_unneeded = irecv[2]; 1517 info->memory = irecv[3]; 1518 info->mallocs = irecv[4]; 1519 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1520 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1521 info->fill_ratio_needed = 0; 1522 info->factor_mallocs = 0; 1523 PetscFunctionReturn(0); 1524 } 1525 1526 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1527 { 1528 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1529 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1530 PetscErrorCode ierr; 1531 1532 PetscFunctionBegin; 1533 switch (op) { 1534 case MAT_NEW_NONZERO_LOCATIONS: 1535 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1536 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1537 case MAT_KEEP_NONZERO_PATTERN: 1538 case MAT_SUBMAT_SINGLEIS: 1539 case MAT_NEW_NONZERO_LOCATION_ERR: 1540 MatCheckPreallocated(A,1); 1541 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1542 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1543 break; 1544 case MAT_ROW_ORIENTED: 1545 MatCheckPreallocated(A,1); 1546 a->roworiented = flg; 1547 1548 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1549 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1550 break; 1551 case MAT_NEW_DIAGONALS: 1552 case MAT_SORTED_FULL: 1553 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1554 break; 1555 case MAT_IGNORE_OFF_PROC_ENTRIES: 1556 a->donotstash = flg; 1557 break; 1558 case MAT_USE_HASH_TABLE: 1559 a->ht_flag = flg; 1560 break; 1561 case MAT_HERMITIAN: 1562 MatCheckPreallocated(A,1); 1563 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1564 #if defined(PETSC_USE_COMPLEX) 1565 if (flg) { /* need different mat-vec ops */ 1566 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1567 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1568 A->ops->multtranspose = NULL; 1569 A->ops->multtransposeadd = NULL; 1570 A->symmetric = PETSC_FALSE; 1571 } 1572 #endif 1573 break; 1574 case MAT_SPD: 1575 case MAT_SYMMETRIC: 1576 MatCheckPreallocated(A,1); 1577 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1578 #if defined(PETSC_USE_COMPLEX) 1579 if (flg) { /* restore to use default mat-vec ops */ 1580 A->ops->mult = MatMult_MPISBAIJ; 1581 A->ops->multadd = MatMultAdd_MPISBAIJ; 1582 A->ops->multtranspose = MatMult_MPISBAIJ; 1583 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1584 } 1585 #endif 1586 break; 1587 case MAT_STRUCTURALLY_SYMMETRIC: 1588 MatCheckPreallocated(A,1); 1589 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1590 break; 1591 case MAT_SYMMETRY_ETERNAL: 1592 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1593 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1594 break; 1595 case MAT_IGNORE_LOWER_TRIANGULAR: 1596 aA->ignore_ltriangular = flg; 1597 break; 1598 case MAT_ERROR_LOWER_TRIANGULAR: 1599 aA->ignore_ltriangular = flg; 1600 break; 1601 case MAT_GETROW_UPPERTRIANGULAR: 1602 aA->getrow_utriangular = flg; 1603 break; 1604 default: 1605 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1606 } 1607 PetscFunctionReturn(0); 1608 } 1609 1610 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1611 { 1612 PetscErrorCode ierr; 1613 1614 PetscFunctionBegin; 1615 if (reuse == MAT_INITIAL_MATRIX) { 1616 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1617 } else if (reuse == MAT_REUSE_MATRIX) { 1618 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1619 } 1620 PetscFunctionReturn(0); 1621 } 1622 1623 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1624 { 1625 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1626 Mat a = baij->A, b=baij->B; 1627 PetscErrorCode ierr; 1628 PetscInt nv,m,n; 1629 PetscBool flg; 1630 1631 PetscFunctionBegin; 1632 if (ll != rr) { 1633 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1634 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1635 } 1636 if (!ll) PetscFunctionReturn(0); 1637 1638 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1639 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1640 1641 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1642 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1643 1644 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 1646 /* left diagonalscale the off-diagonal part */ 1647 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1648 1649 /* scale the diagonal part */ 1650 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1651 1652 /* right diagonalscale the off-diagonal part */ 1653 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1654 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1655 PetscFunctionReturn(0); 1656 } 1657 1658 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1659 { 1660 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1661 PetscErrorCode ierr; 1662 1663 PetscFunctionBegin; 1664 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1665 PetscFunctionReturn(0); 1666 } 1667 1668 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1669 1670 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1671 { 1672 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1673 Mat a,b,c,d; 1674 PetscBool flg; 1675 PetscErrorCode ierr; 1676 1677 PetscFunctionBegin; 1678 a = matA->A; b = matA->B; 1679 c = matB->A; d = matB->B; 1680 1681 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1682 if (flg) { 1683 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1684 } 1685 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1686 PetscFunctionReturn(0); 1687 } 1688 1689 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1690 { 1691 PetscErrorCode ierr; 1692 PetscBool isbaij; 1693 1694 PetscFunctionBegin; 1695 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 1696 if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1697 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1698 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1699 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1700 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1701 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1702 } else { 1703 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1704 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1705 1706 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1707 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1708 } 1709 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1710 PetscFunctionReturn(0); 1711 } 1712 1713 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1714 { 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1719 PetscFunctionReturn(0); 1720 } 1721 1722 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1723 { 1724 PetscErrorCode ierr; 1725 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1726 PetscBLASInt bnz,one=1; 1727 Mat_SeqSBAIJ *xa,*ya; 1728 Mat_SeqBAIJ *xb,*yb; 1729 1730 PetscFunctionBegin; 1731 if (str == SAME_NONZERO_PATTERN) { 1732 PetscScalar alpha = a; 1733 xa = (Mat_SeqSBAIJ*)xx->A->data; 1734 ya = (Mat_SeqSBAIJ*)yy->A->data; 1735 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1736 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1737 xb = (Mat_SeqBAIJ*)xx->B->data; 1738 yb = (Mat_SeqBAIJ*)yy->B->data; 1739 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1740 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1741 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1742 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1743 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1744 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1745 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1746 } else { 1747 Mat B; 1748 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1749 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1750 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1751 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1752 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1753 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1754 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1755 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1756 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1757 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1758 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1759 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1760 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1761 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1762 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1763 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1764 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1765 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1766 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1767 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1768 } 1769 PetscFunctionReturn(0); 1770 } 1771 1772 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1773 { 1774 PetscErrorCode ierr; 1775 PetscInt i; 1776 PetscBool flg; 1777 1778 PetscFunctionBegin; 1779 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1780 for (i=0; i<n; i++) { 1781 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1782 if (!flg) { 1783 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1784 } 1785 } 1786 PetscFunctionReturn(0); 1787 } 1788 1789 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1790 { 1791 PetscErrorCode ierr; 1792 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1793 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1794 1795 PetscFunctionBegin; 1796 if (!Y->preallocated) { 1797 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1798 } else if (!aij->nz) { 1799 PetscInt nonew = aij->nonew; 1800 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1801 aij->nonew = nonew; 1802 } 1803 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1804 PetscFunctionReturn(0); 1805 } 1806 1807 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1808 { 1809 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1810 PetscErrorCode ierr; 1811 1812 PetscFunctionBegin; 1813 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1814 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1815 if (d) { 1816 PetscInt rstart; 1817 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1818 *d += rstart/A->rmap->bs; 1819 1820 } 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1825 { 1826 PetscFunctionBegin; 1827 *a = ((Mat_MPISBAIJ*)A->data)->A; 1828 PetscFunctionReturn(0); 1829 } 1830 1831 /* -------------------------------------------------------------------*/ 1832 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1833 MatGetRow_MPISBAIJ, 1834 MatRestoreRow_MPISBAIJ, 1835 MatMult_MPISBAIJ, 1836 /* 4*/ MatMultAdd_MPISBAIJ, 1837 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1838 MatMultAdd_MPISBAIJ, 1839 0, 1840 0, 1841 0, 1842 /* 10*/ 0, 1843 0, 1844 0, 1845 MatSOR_MPISBAIJ, 1846 MatTranspose_MPISBAIJ, 1847 /* 15*/ MatGetInfo_MPISBAIJ, 1848 MatEqual_MPISBAIJ, 1849 MatGetDiagonal_MPISBAIJ, 1850 MatDiagonalScale_MPISBAIJ, 1851 MatNorm_MPISBAIJ, 1852 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1853 MatAssemblyEnd_MPISBAIJ, 1854 MatSetOption_MPISBAIJ, 1855 MatZeroEntries_MPISBAIJ, 1856 /* 24*/ 0, 1857 0, 1858 0, 1859 0, 1860 0, 1861 /* 29*/ MatSetUp_MPISBAIJ, 1862 0, 1863 0, 1864 MatGetDiagonalBlock_MPISBAIJ, 1865 0, 1866 /* 34*/ MatDuplicate_MPISBAIJ, 1867 0, 1868 0, 1869 0, 1870 0, 1871 /* 39*/ MatAXPY_MPISBAIJ, 1872 MatCreateSubMatrices_MPISBAIJ, 1873 MatIncreaseOverlap_MPISBAIJ, 1874 MatGetValues_MPISBAIJ, 1875 MatCopy_MPISBAIJ, 1876 /* 44*/ 0, 1877 MatScale_MPISBAIJ, 1878 MatShift_MPISBAIJ, 1879 0, 1880 0, 1881 /* 49*/ 0, 1882 0, 1883 0, 1884 0, 1885 0, 1886 /* 54*/ 0, 1887 0, 1888 MatSetUnfactored_MPISBAIJ, 1889 0, 1890 MatSetValuesBlocked_MPISBAIJ, 1891 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1892 0, 1893 0, 1894 0, 1895 0, 1896 /* 64*/ 0, 1897 0, 1898 0, 1899 0, 1900 0, 1901 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1902 0, 1903 MatConvert_MPISBAIJ_Basic, 1904 0, 1905 0, 1906 /* 74*/ 0, 1907 0, 1908 0, 1909 0, 1910 0, 1911 /* 79*/ 0, 1912 0, 1913 0, 1914 0, 1915 MatLoad_MPISBAIJ, 1916 /* 84*/ 0, 1917 0, 1918 0, 1919 0, 1920 0, 1921 /* 89*/ 0, 1922 0, 1923 0, 1924 0, 1925 0, 1926 /* 94*/ 0, 1927 0, 1928 0, 1929 0, 1930 0, 1931 /* 99*/ 0, 1932 0, 1933 0, 1934 0, 1935 0, 1936 /*104*/ 0, 1937 MatRealPart_MPISBAIJ, 1938 MatImaginaryPart_MPISBAIJ, 1939 MatGetRowUpperTriangular_MPISBAIJ, 1940 MatRestoreRowUpperTriangular_MPISBAIJ, 1941 /*109*/ 0, 1942 0, 1943 0, 1944 0, 1945 MatMissingDiagonal_MPISBAIJ, 1946 /*114*/ 0, 1947 0, 1948 0, 1949 0, 1950 0, 1951 /*119*/ 0, 1952 0, 1953 0, 1954 0, 1955 0, 1956 /*124*/ 0, 1957 0, 1958 0, 1959 0, 1960 0, 1961 /*129*/ 0, 1962 0, 1963 0, 1964 0, 1965 0, 1966 /*134*/ 0, 1967 0, 1968 0, 1969 0, 1970 0, 1971 /*139*/ MatSetBlockSizes_Default, 1972 0, 1973 0, 1974 0, 1975 0, 1976 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1977 }; 1978 1979 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1980 { 1981 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1982 PetscErrorCode ierr; 1983 PetscInt i,mbs,Mbs; 1984 PetscMPIInt size; 1985 1986 PetscFunctionBegin; 1987 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1988 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1989 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1990 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1991 if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N); 1992 if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n); 1993 1994 mbs = B->rmap->n/bs; 1995 Mbs = B->rmap->N/bs; 1996 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1997 1998 B->rmap->bs = bs; 1999 b->bs2 = bs*bs; 2000 b->mbs = mbs; 2001 b->Mbs = Mbs; 2002 b->nbs = B->cmap->n/bs; 2003 b->Nbs = B->cmap->N/bs; 2004 2005 for (i=0; i<=b->size; i++) { 2006 b->rangebs[i] = B->rmap->range[i]/bs; 2007 } 2008 b->rstartbs = B->rmap->rstart/bs; 2009 b->rendbs = B->rmap->rend/bs; 2010 2011 b->cstartbs = B->cmap->rstart/bs; 2012 b->cendbs = B->cmap->rend/bs; 2013 2014 #if defined(PETSC_USE_CTABLE) 2015 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2016 #else 2017 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2018 #endif 2019 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2020 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2021 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2022 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2023 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2024 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2025 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2026 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2027 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2028 2029 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2030 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2031 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2032 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2033 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2034 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2035 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2036 2037 if (!B->preallocated) { 2038 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2039 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2040 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2041 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2042 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2043 } 2044 2045 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2046 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2047 2048 B->preallocated = PETSC_TRUE; 2049 B->was_assembled = PETSC_FALSE; 2050 B->assembled = PETSC_FALSE; 2051 PetscFunctionReturn(0); 2052 } 2053 2054 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2055 { 2056 PetscInt m,rstart,cend; 2057 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0; 2058 const PetscInt *JJ =0; 2059 PetscScalar *values=0; 2060 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 2061 PetscErrorCode ierr; 2062 PetscBool nooffprocentries; 2063 2064 PetscFunctionBegin; 2065 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2066 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2067 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2068 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2069 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2070 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2071 m = B->rmap->n/bs; 2072 rstart = B->rmap->rstart/bs; 2073 cend = B->cmap->rend/bs; 2074 2075 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2076 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2077 for (i=0; i<m; i++) { 2078 nz = ii[i+1] - ii[i]; 2079 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2080 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2081 JJ = jj + ii[i]; 2082 bd = 0; 2083 for (j=0; j<nz; j++) { 2084 if (*JJ >= i + rstart) break; 2085 JJ++; 2086 bd++; 2087 } 2088 d = 0; 2089 for (; j<nz; j++) { 2090 if (*JJ++ >= cend) break; 2091 d++; 2092 } 2093 d_nnz[i] = d; 2094 o_nnz[i] = nz - d - bd; 2095 nz = nz - bd; 2096 nz_max = PetscMax(nz_max,nz); 2097 } 2098 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2099 ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2100 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2101 2102 values = (PetscScalar*)V; 2103 if (!values) { 2104 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2105 } 2106 for (i=0; i<m; i++) { 2107 PetscInt row = i + rstart; 2108 PetscInt ncols = ii[i+1] - ii[i]; 2109 const PetscInt *icols = jj + ii[i]; 2110 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2111 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2112 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2113 } else { /* block ordering does not match so we can only insert one block at a time. */ 2114 PetscInt j; 2115 for (j=0; j<ncols; j++) { 2116 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2117 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2118 } 2119 } 2120 } 2121 2122 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2123 nooffprocentries = B->nooffprocentries; 2124 B->nooffprocentries = PETSC_TRUE; 2125 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2126 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2127 B->nooffprocentries = nooffprocentries; 2128 2129 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2130 PetscFunctionReturn(0); 2131 } 2132 2133 /*MC 2134 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2135 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2136 the matrix is stored. 2137 2138 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2139 can call MatSetOption(Mat, MAT_HERMITIAN); 2140 2141 Options Database Keys: 2142 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2143 2144 Notes: 2145 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2146 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2147 2148 Level: beginner 2149 2150 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType 2151 M*/ 2152 2153 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2154 { 2155 Mat_MPISBAIJ *b; 2156 PetscErrorCode ierr; 2157 PetscBool flg = PETSC_FALSE; 2158 2159 PetscFunctionBegin; 2160 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2161 B->data = (void*)b; 2162 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2163 2164 B->ops->destroy = MatDestroy_MPISBAIJ; 2165 B->ops->view = MatView_MPISBAIJ; 2166 B->assembled = PETSC_FALSE; 2167 B->insertmode = NOT_SET_VALUES; 2168 2169 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2170 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2171 2172 /* build local table of row and column ownerships */ 2173 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2174 2175 /* build cache for off array entries formed */ 2176 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2177 2178 b->donotstash = PETSC_FALSE; 2179 b->colmap = NULL; 2180 b->garray = NULL; 2181 b->roworiented = PETSC_TRUE; 2182 2183 /* stuff used in block assembly */ 2184 b->barray = 0; 2185 2186 /* stuff used for matrix vector multiply */ 2187 b->lvec = 0; 2188 b->Mvctx = 0; 2189 b->slvec0 = 0; 2190 b->slvec0b = 0; 2191 b->slvec1 = 0; 2192 b->slvec1a = 0; 2193 b->slvec1b = 0; 2194 b->sMvctx = 0; 2195 2196 /* stuff for MatGetRow() */ 2197 b->rowindices = 0; 2198 b->rowvalues = 0; 2199 b->getrowactive = PETSC_FALSE; 2200 2201 /* hash table stuff */ 2202 b->ht = 0; 2203 b->hd = 0; 2204 b->ht_size = 0; 2205 b->ht_flag = PETSC_FALSE; 2206 b->ht_fact = 0; 2207 b->ht_total_ct = 0; 2208 b->ht_insert_ct = 0; 2209 2210 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2211 b->ijonly = PETSC_FALSE; 2212 2213 b->in_loc = 0; 2214 b->v_loc = 0; 2215 b->n_loc = 0; 2216 2217 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2218 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2219 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2220 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2221 #if defined(PETSC_HAVE_ELEMENTAL) 2222 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2223 #endif 2224 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2225 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2226 2227 B->symmetric = PETSC_TRUE; 2228 B->structurally_symmetric = PETSC_TRUE; 2229 B->symmetric_set = PETSC_TRUE; 2230 B->structurally_symmetric_set = PETSC_TRUE; 2231 B->symmetric_eternal = PETSC_TRUE; 2232 #if defined(PETSC_USE_COMPLEX) 2233 B->hermitian = PETSC_FALSE; 2234 B->hermitian_set = PETSC_FALSE; 2235 #else 2236 B->hermitian = PETSC_TRUE; 2237 B->hermitian_set = PETSC_TRUE; 2238 #endif 2239 2240 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2241 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2242 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2243 if (flg) { 2244 PetscReal fact = 1.39; 2245 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2246 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2247 if (fact <= 1.0) fact = 1.39; 2248 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2249 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2250 } 2251 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2252 PetscFunctionReturn(0); 2253 } 2254 2255 /*MC 2256 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2257 2258 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2259 and MATMPISBAIJ otherwise. 2260 2261 Options Database Keys: 2262 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2263 2264 Level: beginner 2265 2266 .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ 2267 M*/ 2268 2269 /*@C 2270 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2271 the user should preallocate the matrix storage by setting the parameters 2272 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2273 performance can be increased by more than a factor of 50. 2274 2275 Collective on Mat 2276 2277 Input Parameters: 2278 + B - the matrix 2279 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2280 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2281 . d_nz - number of block nonzeros per block row in diagonal portion of local 2282 submatrix (same for all local rows) 2283 . d_nnz - array containing the number of block nonzeros in the various block rows 2284 in the upper triangular and diagonal part of the in diagonal portion of the local 2285 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2286 for the diagonal entry and set a value even if it is zero. 2287 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2288 submatrix (same for all local rows). 2289 - o_nnz - array containing the number of nonzeros in the various block rows of the 2290 off-diagonal portion of the local submatrix that is right of the diagonal 2291 (possibly different for each block row) or NULL. 2292 2293 2294 Options Database Keys: 2295 + -mat_no_unroll - uses code that does not unroll the loops in the 2296 block calculations (much slower) 2297 - -mat_block_size - size of the blocks to use 2298 2299 Notes: 2300 2301 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2302 than it must be used on all processors that share the object for that argument. 2303 2304 If the *_nnz parameter is given then the *_nz parameter is ignored 2305 2306 Storage Information: 2307 For a square global matrix we define each processor's diagonal portion 2308 to be its local rows and the corresponding columns (a square submatrix); 2309 each processor's off-diagonal portion encompasses the remainder of the 2310 local matrix (a rectangular submatrix). 2311 2312 The user can specify preallocated storage for the diagonal part of 2313 the local submatrix with either d_nz or d_nnz (not both). Set 2314 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2315 memory allocation. Likewise, specify preallocated storage for the 2316 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2317 2318 You can call MatGetInfo() to get information on how effective the preallocation was; 2319 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2320 You can also run with the option -info and look for messages with the string 2321 malloc in them to see if additional memory allocation was needed. 2322 2323 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2324 the figure below we depict these three local rows and all columns (0-11). 2325 2326 .vb 2327 0 1 2 3 4 5 6 7 8 9 10 11 2328 -------------------------- 2329 row 3 |. . . d d d o o o o o o 2330 row 4 |. . . d d d o o o o o o 2331 row 5 |. . . d d d o o o o o o 2332 -------------------------- 2333 .ve 2334 2335 Thus, any entries in the d locations are stored in the d (diagonal) 2336 submatrix, and any entries in the o locations are stored in the 2337 o (off-diagonal) submatrix. Note that the d matrix is stored in 2338 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2339 2340 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2341 plus the diagonal part of the d matrix, 2342 and o_nz should indicate the number of block nonzeros per row in the o matrix 2343 2344 In general, for PDE problems in which most nonzeros are near the diagonal, 2345 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2346 or you will get TERRIBLE performance; see the users' manual chapter on 2347 matrices. 2348 2349 Level: intermediate 2350 2351 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2352 @*/ 2353 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2354 { 2355 PetscErrorCode ierr; 2356 2357 PetscFunctionBegin; 2358 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2359 PetscValidType(B,1); 2360 PetscValidLogicalCollectiveInt(B,bs,2); 2361 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 /*@C 2366 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2367 (block compressed row). For good matrix assembly performance 2368 the user should preallocate the matrix storage by setting the parameters 2369 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2370 performance can be increased by more than a factor of 50. 2371 2372 Collective 2373 2374 Input Parameters: 2375 + comm - MPI communicator 2376 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2377 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2378 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2379 This value should be the same as the local size used in creating the 2380 y vector for the matrix-vector product y = Ax. 2381 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2382 This value should be the same as the local size used in creating the 2383 x vector for the matrix-vector product y = Ax. 2384 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2385 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2386 . d_nz - number of block nonzeros per block row in diagonal portion of local 2387 submatrix (same for all local rows) 2388 . d_nnz - array containing the number of block nonzeros in the various block rows 2389 in the upper triangular portion of the in diagonal portion of the local 2390 (possibly different for each block block row) or NULL. 2391 If you plan to factor the matrix you must leave room for the diagonal entry and 2392 set its value even if it is zero. 2393 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2394 submatrix (same for all local rows). 2395 - o_nnz - array containing the number of nonzeros in the various block rows of the 2396 off-diagonal portion of the local submatrix (possibly different for 2397 each block row) or NULL. 2398 2399 Output Parameter: 2400 . A - the matrix 2401 2402 Options Database Keys: 2403 + -mat_no_unroll - uses code that does not unroll the loops in the 2404 block calculations (much slower) 2405 . -mat_block_size - size of the blocks to use 2406 - -mat_mpi - use the parallel matrix data structures even on one processor 2407 (defaults to using SeqBAIJ format on one processor) 2408 2409 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2410 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2411 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2412 2413 Notes: 2414 The number of rows and columns must be divisible by blocksize. 2415 This matrix type does not support complex Hermitian operation. 2416 2417 The user MUST specify either the local or global matrix dimensions 2418 (possibly both). 2419 2420 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2421 than it must be used on all processors that share the object for that argument. 2422 2423 If the *_nnz parameter is given then the *_nz parameter is ignored 2424 2425 Storage Information: 2426 For a square global matrix we define each processor's diagonal portion 2427 to be its local rows and the corresponding columns (a square submatrix); 2428 each processor's off-diagonal portion encompasses the remainder of the 2429 local matrix (a rectangular submatrix). 2430 2431 The user can specify preallocated storage for the diagonal part of 2432 the local submatrix with either d_nz or d_nnz (not both). Set 2433 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2434 memory allocation. Likewise, specify preallocated storage for the 2435 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2436 2437 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2438 the figure below we depict these three local rows and all columns (0-11). 2439 2440 .vb 2441 0 1 2 3 4 5 6 7 8 9 10 11 2442 -------------------------- 2443 row 3 |. . . d d d o o o o o o 2444 row 4 |. . . d d d o o o o o o 2445 row 5 |. . . d d d o o o o o o 2446 -------------------------- 2447 .ve 2448 2449 Thus, any entries in the d locations are stored in the d (diagonal) 2450 submatrix, and any entries in the o locations are stored in the 2451 o (off-diagonal) submatrix. Note that the d matrix is stored in 2452 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2453 2454 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2455 plus the diagonal part of the d matrix, 2456 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2457 In general, for PDE problems in which most nonzeros are near the diagonal, 2458 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2459 or you will get TERRIBLE performance; see the users' manual chapter on 2460 matrices. 2461 2462 Level: intermediate 2463 2464 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2465 @*/ 2466 2467 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2468 { 2469 PetscErrorCode ierr; 2470 PetscMPIInt size; 2471 2472 PetscFunctionBegin; 2473 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2474 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2475 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2476 if (size > 1) { 2477 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2478 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2479 } else { 2480 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2481 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2482 } 2483 PetscFunctionReturn(0); 2484 } 2485 2486 2487 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2488 { 2489 Mat mat; 2490 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2491 PetscErrorCode ierr; 2492 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2493 PetscScalar *array; 2494 2495 PetscFunctionBegin; 2496 *newmat = 0; 2497 2498 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2499 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2500 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2501 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2502 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2503 2504 mat->factortype = matin->factortype; 2505 mat->preallocated = PETSC_TRUE; 2506 mat->assembled = PETSC_TRUE; 2507 mat->insertmode = NOT_SET_VALUES; 2508 2509 a = (Mat_MPISBAIJ*)mat->data; 2510 a->bs2 = oldmat->bs2; 2511 a->mbs = oldmat->mbs; 2512 a->nbs = oldmat->nbs; 2513 a->Mbs = oldmat->Mbs; 2514 a->Nbs = oldmat->Nbs; 2515 2516 a->size = oldmat->size; 2517 a->rank = oldmat->rank; 2518 a->donotstash = oldmat->donotstash; 2519 a->roworiented = oldmat->roworiented; 2520 a->rowindices = 0; 2521 a->rowvalues = 0; 2522 a->getrowactive = PETSC_FALSE; 2523 a->barray = 0; 2524 a->rstartbs = oldmat->rstartbs; 2525 a->rendbs = oldmat->rendbs; 2526 a->cstartbs = oldmat->cstartbs; 2527 a->cendbs = oldmat->cendbs; 2528 2529 /* hash table stuff */ 2530 a->ht = 0; 2531 a->hd = 0; 2532 a->ht_size = 0; 2533 a->ht_flag = oldmat->ht_flag; 2534 a->ht_fact = oldmat->ht_fact; 2535 a->ht_total_ct = 0; 2536 a->ht_insert_ct = 0; 2537 2538 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr); 2539 if (oldmat->colmap) { 2540 #if defined(PETSC_USE_CTABLE) 2541 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2542 #else 2543 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2544 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2545 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 2546 #endif 2547 } else a->colmap = 0; 2548 2549 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2550 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2551 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2552 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 2553 } else a->garray = 0; 2554 2555 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2556 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2557 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2558 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2559 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2560 2561 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2562 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2563 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2564 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2565 2566 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2567 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2568 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2569 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2570 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2571 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2572 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2573 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2574 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2575 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2576 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2577 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2578 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2579 2580 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2581 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2582 a->sMvctx = oldmat->sMvctx; 2583 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2584 2585 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2586 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2587 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2588 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2589 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2590 *newmat = mat; 2591 PetscFunctionReturn(0); 2592 } 2593 2594 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2595 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2596 2597 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer) 2598 { 2599 PetscErrorCode ierr; 2600 PetscBool isbinary; 2601 2602 PetscFunctionBegin; 2603 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2604 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 2605 ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 2606 PetscFunctionReturn(0); 2607 } 2608 2609 /*XXXXX@ 2610 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2611 2612 Input Parameters: 2613 . mat - the matrix 2614 . fact - factor 2615 2616 Not Collective on Mat, each process can have a different hash factor 2617 2618 Level: advanced 2619 2620 Notes: 2621 This can also be set by the command line option: -mat_use_hash_table fact 2622 2623 .seealso: MatSetOption() 2624 @XXXXX*/ 2625 2626 2627 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2628 { 2629 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2630 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2631 PetscReal atmp; 2632 PetscReal *work,*svalues,*rvalues; 2633 PetscErrorCode ierr; 2634 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2635 PetscMPIInt rank,size; 2636 PetscInt *rowners_bs,dest,count,source; 2637 PetscScalar *va; 2638 MatScalar *ba; 2639 MPI_Status stat; 2640 2641 PetscFunctionBegin; 2642 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2643 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2644 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2645 2646 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2647 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2648 2649 bs = A->rmap->bs; 2650 mbs = a->mbs; 2651 Mbs = a->Mbs; 2652 ba = b->a; 2653 bi = b->i; 2654 bj = b->j; 2655 2656 /* find ownerships */ 2657 rowners_bs = A->rmap->range; 2658 2659 /* each proc creates an array to be distributed */ 2660 ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2661 2662 /* row_max for B */ 2663 if (rank != size-1) { 2664 for (i=0; i<mbs; i++) { 2665 ncols = bi[1] - bi[0]; bi++; 2666 brow = bs*i; 2667 for (j=0; j<ncols; j++) { 2668 bcol = bs*(*bj); 2669 for (kcol=0; kcol<bs; kcol++) { 2670 col = bcol + kcol; /* local col index */ 2671 col += rowners_bs[rank+1]; /* global col index */ 2672 for (krow=0; krow<bs; krow++) { 2673 atmp = PetscAbsScalar(*ba); ba++; 2674 row = brow + krow; /* local row index */ 2675 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2676 if (work[col] < atmp) work[col] = atmp; 2677 } 2678 } 2679 bj++; 2680 } 2681 } 2682 2683 /* send values to its owners */ 2684 for (dest=rank+1; dest<size; dest++) { 2685 svalues = work + rowners_bs[dest]; 2686 count = rowners_bs[dest+1]-rowners_bs[dest]; 2687 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2688 } 2689 } 2690 2691 /* receive values */ 2692 if (rank) { 2693 rvalues = work; 2694 count = rowners_bs[rank+1]-rowners_bs[rank]; 2695 for (source=0; source<rank; source++) { 2696 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2697 /* process values */ 2698 for (i=0; i<count; i++) { 2699 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2700 } 2701 } 2702 } 2703 2704 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2705 ierr = PetscFree(work);CHKERRQ(ierr); 2706 PetscFunctionReturn(0); 2707 } 2708 2709 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2710 { 2711 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2712 PetscErrorCode ierr; 2713 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2714 PetscScalar *x,*ptr,*from; 2715 Vec bb1; 2716 const PetscScalar *b; 2717 2718 PetscFunctionBegin; 2719 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2720 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2721 2722 if (flag == SOR_APPLY_UPPER) { 2723 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2724 PetscFunctionReturn(0); 2725 } 2726 2727 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2728 if (flag & SOR_ZERO_INITIAL_GUESS) { 2729 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2730 its--; 2731 } 2732 2733 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2734 while (its--) { 2735 2736 /* lower triangular part: slvec0b = - B^T*xx */ 2737 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2738 2739 /* copy xx into slvec0a */ 2740 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2741 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2742 ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr); 2743 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2744 2745 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2746 2747 /* copy bb into slvec1a */ 2748 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2749 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2750 ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr); 2751 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2752 2753 /* set slvec1b = 0 */ 2754 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2755 2756 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2757 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2758 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2759 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2760 2761 /* upper triangular part: bb1 = bb1 - B*x */ 2762 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2763 2764 /* local diagonal sweep */ 2765 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2766 } 2767 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2768 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2769 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2770 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2771 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2772 } else if (flag & SOR_EISENSTAT) { 2773 Vec xx1; 2774 PetscBool hasop; 2775 const PetscScalar *diag; 2776 PetscScalar *sl,scale = (omega - 2.0)/omega; 2777 PetscInt i,n; 2778 2779 if (!mat->xx1) { 2780 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2781 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2782 } 2783 xx1 = mat->xx1; 2784 bb1 = mat->bb1; 2785 2786 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2787 2788 if (!mat->diag) { 2789 /* this is wrong for same matrix with new nonzero values */ 2790 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2791 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2792 } 2793 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2794 2795 if (hasop) { 2796 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2797 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2798 } else { 2799 /* 2800 These two lines are replaced by code that may be a bit faster for a good compiler 2801 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2802 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2803 */ 2804 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2805 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2806 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2807 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2808 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2809 if (omega == 1.0) { 2810 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2811 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2812 } else { 2813 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2814 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2815 } 2816 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2817 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2818 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2819 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2820 } 2821 2822 /* multiply off-diagonal portion of matrix */ 2823 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2824 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2825 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2826 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2827 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 2828 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2829 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2830 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2831 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2832 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2833 2834 /* local sweep */ 2835 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2836 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2837 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2838 PetscFunctionReturn(0); 2839 } 2840 2841 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2842 { 2843 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2844 PetscErrorCode ierr; 2845 Vec lvec1,bb1; 2846 2847 PetscFunctionBegin; 2848 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2849 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2850 2851 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2852 if (flag & SOR_ZERO_INITIAL_GUESS) { 2853 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2854 its--; 2855 } 2856 2857 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2858 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2859 while (its--) { 2860 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2861 2862 /* lower diagonal part: bb1 = bb - B^T*xx */ 2863 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2864 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2865 2866 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2867 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2868 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2869 2870 /* upper diagonal part: bb1 = bb1 - B*x */ 2871 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2872 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2873 2874 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2875 2876 /* diagonal sweep */ 2877 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2878 } 2879 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2880 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2881 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2882 PetscFunctionReturn(0); 2883 } 2884 2885 /*@ 2886 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2887 CSR format the local rows. 2888 2889 Collective 2890 2891 Input Parameters: 2892 + comm - MPI communicator 2893 . bs - the block size, only a block size of 1 is supported 2894 . m - number of local rows (Cannot be PETSC_DECIDE) 2895 . n - This value should be the same as the local size used in creating the 2896 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2897 calculated if N is given) For square matrices n is almost always m. 2898 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2899 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2900 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2901 . j - column indices 2902 - a - matrix values 2903 2904 Output Parameter: 2905 . mat - the matrix 2906 2907 Level: intermediate 2908 2909 Notes: 2910 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2911 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2912 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2913 2914 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2915 2916 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2917 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2918 @*/ 2919 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2920 { 2921 PetscErrorCode ierr; 2922 2923 2924 PetscFunctionBegin; 2925 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2926 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2927 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2928 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2929 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2930 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2931 PetscFunctionReturn(0); 2932 } 2933 2934 2935 /*@C 2936 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 2937 2938 Collective 2939 2940 Input Parameters: 2941 + B - the matrix 2942 . bs - the block size 2943 . i - the indices into j for the start of each local row (starts with zero) 2944 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2945 - v - optional values in the matrix 2946 2947 Level: advanced 2948 2949 Notes: 2950 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2951 and usually the numerical values as well 2952 2953 Any entries below the diagonal are ignored 2954 2955 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2956 @*/ 2957 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2958 { 2959 PetscErrorCode ierr; 2960 2961 PetscFunctionBegin; 2962 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2963 PetscFunctionReturn(0); 2964 } 2965 2966 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2967 { 2968 PetscErrorCode ierr; 2969 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2970 PetscInt *indx; 2971 PetscScalar *values; 2972 2973 PetscFunctionBegin; 2974 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2975 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2976 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2977 PetscInt *dnz,*onz,mbs,Nbs,nbs; 2978 PetscInt *bindx,rmax=a->rmax,j; 2979 PetscMPIInt rank,size; 2980 2981 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2982 mbs = m/bs; Nbs = N/cbs; 2983 if (n == PETSC_DECIDE) { 2984 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 2985 } 2986 nbs = n/cbs; 2987 2988 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2989 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 2990 2991 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2992 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 2993 if (rank == size-1) { 2994 /* Check sum(nbs) = Nbs */ 2995 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 2996 } 2997 2998 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 2999 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3000 for (i=0; i<mbs; i++) { 3001 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3002 nnz = nnz/bs; 3003 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3004 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3005 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3006 } 3007 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3008 ierr = PetscFree(bindx);CHKERRQ(ierr); 3009 3010 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3011 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3012 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3013 ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr); 3014 ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3015 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3016 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3017 } 3018 3019 /* numeric phase */ 3020 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3021 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3022 3023 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3024 for (i=0; i<m; i++) { 3025 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3026 Ii = i + rstart; 3027 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3028 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3029 } 3030 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3031 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3032 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3033 PetscFunctionReturn(0); 3034 } 3035