1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 /* This could be moved to matimpl.h */ 12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 13 { 14 Mat preallocator; 15 PetscInt r,rstart,rend; 16 PetscInt bs,i,m,n,M,N; 17 PetscBool cong = PETSC_TRUE; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 22 PetscValidLogicalCollectiveInt(B,nm,2); 23 for (i = 0; i < nm; i++) { 24 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 25 ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 26 if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 27 } 28 PetscValidLogicalCollectiveBool(B,fill,5); 29 ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 30 ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 31 ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 32 ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 33 ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 34 ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 35 ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 36 ierr = MatSetUp(preallocator);CHKERRQ(ierr); 37 ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 38 for (r = rstart; r < rend; ++r) { 39 PetscInt ncols; 40 const PetscInt *row; 41 const PetscScalar *vals; 42 43 for (i = 0; i < nm; i++) { 44 ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 45 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 46 if (symm && symm[i]) { 47 ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 48 } 49 ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 50 } 51 } 52 ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 53 ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 54 ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 55 ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 60 { 61 Mat B; 62 PetscErrorCode ierr; 63 PetscInt r; 64 65 PetscFunctionBegin; 66 if (reuse != MAT_REUSE_MATRIX) { 67 PetscBool symm = PETSC_TRUE,isdense; 68 PetscInt bs; 69 70 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 71 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 72 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 73 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 74 ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 75 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 76 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 77 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr); 78 if (!isdense) { 79 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 80 ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 81 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 82 } else { 83 ierr = MatSetUp(B);CHKERRQ(ierr); 84 } 85 } else { 86 B = *newmat; 87 ierr = MatZeroEntries(B);CHKERRQ(ierr); 88 } 89 90 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 91 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 92 PetscInt ncols; 93 const PetscInt *row; 94 const PetscScalar *vals; 95 96 ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 97 ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 98 #if defined(PETSC_USE_COMPLEX) 99 if (A->hermitian) { 100 PetscInt i; 101 for (i = 0; i < ncols; i++) { 102 ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr); 103 } 104 } else { 105 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 106 } 107 #else 108 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 109 #endif 110 ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 111 } 112 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 113 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 114 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 115 116 if (reuse == MAT_INPLACE_MATRIX) { 117 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 118 } else { 119 *newmat = B; 120 } 121 PetscFunctionReturn(0); 122 } 123 124 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 125 { 126 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 131 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 136 { 137 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 138 PetscErrorCode ierr; 139 140 PetscFunctionBegin; 141 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 142 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 147 { \ 148 brow = row/bs; \ 149 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 150 rmax = aimax[brow]; nrow = ailen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto a_noinsert; \ 166 } \ 167 } \ 168 if (a->nonew == 1) goto a_noinsert; \ 169 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 170 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 171 N = nrow++ - 1; \ 172 /* shift up all the later entries in this row */ \ 173 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 174 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 175 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 176 rp[_i] = bcol; \ 177 ap[bs2*_i + bs*cidx + ridx] = value; \ 178 A->nonzerostate++;\ 179 a_noinsert:; \ 180 ailen[brow] = nrow; \ 181 } 182 183 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 184 { \ 185 brow = row/bs; \ 186 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 187 rmax = bimax[brow]; nrow = bilen[brow]; \ 188 bcol = col/bs; \ 189 ridx = row % bs; cidx = col % bs; \ 190 low = 0; high = nrow; \ 191 while (high-low > 3) { \ 192 t = (low+high)/2; \ 193 if (rp[t] > bcol) high = t; \ 194 else low = t; \ 195 } \ 196 for (_i=low; _i<high; _i++) { \ 197 if (rp[_i] > bcol) break; \ 198 if (rp[_i] == bcol) { \ 199 bap = ap + bs2*_i + bs*cidx + ridx; \ 200 if (addv == ADD_VALUES) *bap += value; \ 201 else *bap = value; \ 202 goto b_noinsert; \ 203 } \ 204 } \ 205 if (b->nonew == 1) goto b_noinsert; \ 206 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 207 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 208 N = nrow++ - 1; \ 209 /* shift up all the later entries in this row */ \ 210 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 211 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 212 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 213 rp[_i] = bcol; \ 214 ap[bs2*_i + bs*cidx + ridx] = value; \ 215 B->nonzerostate++;\ 216 b_noinsert:; \ 217 bilen[brow] = nrow; \ 218 } 219 220 /* Only add/insert a(i,j) with i<=j (blocks). 221 Any a(i,j) with i>j input by user is ingored or generates an error 222 */ 223 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 224 { 225 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 226 MatScalar value; 227 PetscBool roworiented = baij->roworiented; 228 PetscErrorCode ierr; 229 PetscInt i,j,row,col; 230 PetscInt rstart_orig=mat->rmap->rstart; 231 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 232 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 233 234 /* Some Variables required in the macro */ 235 Mat A = baij->A; 236 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 237 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 238 MatScalar *aa =a->a; 239 240 Mat B = baij->B; 241 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 242 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 243 MatScalar *ba =b->a; 244 245 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 246 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 247 MatScalar *ap,*bap; 248 249 /* for stash */ 250 PetscInt n_loc, *in_loc = NULL; 251 MatScalar *v_loc = NULL; 252 253 PetscFunctionBegin; 254 if (!baij->donotstash) { 255 if (n > baij->n_loc) { 256 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 257 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 258 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 259 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 260 261 baij->n_loc = n; 262 } 263 in_loc = baij->in_loc; 264 v_loc = baij->v_loc; 265 } 266 267 for (i=0; i<m; i++) { 268 if (im[i] < 0) continue; 269 #if defined(PETSC_USE_DEBUG) 270 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 271 #endif 272 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 273 row = im[i] - rstart_orig; /* local row index */ 274 for (j=0; j<n; j++) { 275 if (im[i]/bs > in[j]/bs) { 276 if (a->ignore_ltriangular) { 277 continue; /* ignore lower triangular blocks */ 278 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 279 } 280 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 281 col = in[j] - cstart_orig; /* local col index */ 282 brow = row/bs; bcol = col/bs; 283 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 284 if (roworiented) value = v[i*n+j]; 285 else value = v[i+j*m]; 286 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 287 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 288 } else if (in[j] < 0) continue; 289 #if defined(PETSC_USE_DEBUG) 290 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 291 #endif 292 else { /* off-diag entry (B) */ 293 if (mat->was_assembled) { 294 if (!baij->colmap) { 295 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 296 } 297 #if defined(PETSC_USE_CTABLE) 298 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 299 col = col - 1; 300 #else 301 col = baij->colmap[in[j]/bs] - 1; 302 #endif 303 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 304 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 305 col = in[j]; 306 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 307 B = baij->B; 308 b = (Mat_SeqBAIJ*)(B)->data; 309 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 310 ba = b->a; 311 } else col += in[j]%bs; 312 } else col = in[j]; 313 if (roworiented) value = v[i*n+j]; 314 else value = v[i+j*m]; 315 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 316 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 317 } 318 } 319 } else { /* off processor entry */ 320 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 321 if (!baij->donotstash) { 322 mat->assembled = PETSC_FALSE; 323 n_loc = 0; 324 for (j=0; j<n; j++) { 325 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 326 in_loc[n_loc] = in[j]; 327 if (roworiented) { 328 v_loc[n_loc] = v[i*n+j]; 329 } else { 330 v_loc[n_loc] = v[j*m+i]; 331 } 332 n_loc++; 333 } 334 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 335 } 336 } 337 } 338 PetscFunctionReturn(0); 339 } 340 341 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 342 { 343 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 344 PetscErrorCode ierr; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 348 PetscBool roworiented=a->roworiented; 349 const PetscScalar *value = v; 350 MatScalar *ap,*aa = a->a,*bap; 351 352 PetscFunctionBegin; 353 if (col < row) { 354 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 355 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 356 } 357 rp = aj + ai[row]; 358 ap = aa + bs2*ai[row]; 359 rmax = imax[row]; 360 nrow = ailen[row]; 361 value = v; 362 low = 0; 363 high = nrow; 364 365 while (high-low > 7) { 366 t = (low+high)/2; 367 if (rp[t] > col) high = t; 368 else low = t; 369 } 370 for (i=low; i<high; i++) { 371 if (rp[i] > col) break; 372 if (rp[i] == col) { 373 bap = ap + bs2*i; 374 if (roworiented) { 375 if (is == ADD_VALUES) { 376 for (ii=0; ii<bs; ii++) { 377 for (jj=ii; jj<bs2; jj+=bs) { 378 bap[jj] += *value++; 379 } 380 } 381 } else { 382 for (ii=0; ii<bs; ii++) { 383 for (jj=ii; jj<bs2; jj+=bs) { 384 bap[jj] = *value++; 385 } 386 } 387 } 388 } else { 389 if (is == ADD_VALUES) { 390 for (ii=0; ii<bs; ii++) { 391 for (jj=0; jj<bs; jj++) { 392 *bap++ += *value++; 393 } 394 } 395 } else { 396 for (ii=0; ii<bs; ii++) { 397 for (jj=0; jj<bs; jj++) { 398 *bap++ = *value++; 399 } 400 } 401 } 402 } 403 goto noinsert2; 404 } 405 } 406 if (nonew == 1) goto noinsert2; 407 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 408 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 409 N = nrow++ - 1; high++; 410 /* shift up all the later entries in this row */ 411 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 412 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 413 rp[i] = col; 414 bap = ap + bs2*i; 415 if (roworiented) { 416 for (ii=0; ii<bs; ii++) { 417 for (jj=ii; jj<bs2; jj+=bs) { 418 bap[jj] = *value++; 419 } 420 } 421 } else { 422 for (ii=0; ii<bs; ii++) { 423 for (jj=0; jj<bs; jj++) { 424 *bap++ = *value++; 425 } 426 } 427 } 428 noinsert2:; 429 ailen[row] = nrow; 430 PetscFunctionReturn(0); 431 } 432 433 /* 434 This routine is exactly duplicated in mpibaij.c 435 */ 436 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 437 { 438 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 439 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 440 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 441 PetscErrorCode ierr; 442 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 443 PetscBool roworiented=a->roworiented; 444 const PetscScalar *value = v; 445 MatScalar *ap,*aa = a->a,*bap; 446 447 PetscFunctionBegin; 448 rp = aj + ai[row]; 449 ap = aa + bs2*ai[row]; 450 rmax = imax[row]; 451 nrow = ailen[row]; 452 low = 0; 453 high = nrow; 454 value = v; 455 while (high-low > 7) { 456 t = (low+high)/2; 457 if (rp[t] > col) high = t; 458 else low = t; 459 } 460 for (i=low; i<high; i++) { 461 if (rp[i] > col) break; 462 if (rp[i] == col) { 463 bap = ap + bs2*i; 464 if (roworiented) { 465 if (is == ADD_VALUES) { 466 for (ii=0; ii<bs; ii++) { 467 for (jj=ii; jj<bs2; jj+=bs) { 468 bap[jj] += *value++; 469 } 470 } 471 } else { 472 for (ii=0; ii<bs; ii++) { 473 for (jj=ii; jj<bs2; jj+=bs) { 474 bap[jj] = *value++; 475 } 476 } 477 } 478 } else { 479 if (is == ADD_VALUES) { 480 for (ii=0; ii<bs; ii++,value+=bs) { 481 for (jj=0; jj<bs; jj++) { 482 bap[jj] += value[jj]; 483 } 484 bap += bs; 485 } 486 } else { 487 for (ii=0; ii<bs; ii++,value+=bs) { 488 for (jj=0; jj<bs; jj++) { 489 bap[jj] = value[jj]; 490 } 491 bap += bs; 492 } 493 } 494 } 495 goto noinsert2; 496 } 497 } 498 if (nonew == 1) goto noinsert2; 499 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 500 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 501 N = nrow++ - 1; high++; 502 /* shift up all the later entries in this row */ 503 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 504 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 505 rp[i] = col; 506 bap = ap + bs2*i; 507 if (roworiented) { 508 for (ii=0; ii<bs; ii++) { 509 for (jj=ii; jj<bs2; jj+=bs) { 510 bap[jj] = *value++; 511 } 512 } 513 } else { 514 for (ii=0; ii<bs; ii++) { 515 for (jj=0; jj<bs; jj++) { 516 *bap++ = *value++; 517 } 518 } 519 } 520 noinsert2:; 521 ailen[row] = nrow; 522 PetscFunctionReturn(0); 523 } 524 525 /* 526 This routine could be optimized by removing the need for the block copy below and passing stride information 527 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 528 */ 529 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 530 { 531 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 532 const MatScalar *value; 533 MatScalar *barray =baij->barray; 534 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 535 PetscErrorCode ierr; 536 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 537 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 538 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 539 540 PetscFunctionBegin; 541 if (!barray) { 542 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 543 baij->barray = barray; 544 } 545 546 if (roworiented) { 547 stepval = (n-1)*bs; 548 } else { 549 stepval = (m-1)*bs; 550 } 551 for (i=0; i<m; i++) { 552 if (im[i] < 0) continue; 553 #if defined(PETSC_USE_DEBUG) 554 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 555 #endif 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 for (j=0; j<n; j++) { 559 if (im[i] > in[j]) { 560 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 561 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 562 } 563 /* If NumCol = 1 then a copy is not required */ 564 if ((roworiented) && (n == 1)) { 565 barray = (MatScalar*) v + i*bs2; 566 } else if ((!roworiented) && (m == 1)) { 567 barray = (MatScalar*) v + j*bs2; 568 } else { /* Here a copy is required */ 569 if (roworiented) { 570 value = v + i*(stepval+bs)*bs + j*bs; 571 } else { 572 value = v + j*(stepval+bs)*bs + i*bs; 573 } 574 for (ii=0; ii<bs; ii++,value+=stepval) { 575 for (jj=0; jj<bs; jj++) { 576 *barray++ = *value++; 577 } 578 } 579 barray -=bs2; 580 } 581 582 if (in[j] >= cstart && in[j] < cend) { 583 col = in[j] - cstart; 584 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 585 } else if (in[j] < 0) continue; 586 #if defined(PETSC_USE_DEBUG) 587 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 588 #endif 589 else { 590 if (mat->was_assembled) { 591 if (!baij->colmap) { 592 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 593 } 594 595 #if defined(PETSC_USE_DEBUG) 596 #if defined(PETSC_USE_CTABLE) 597 { PetscInt data; 598 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 599 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 600 } 601 #else 602 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 603 #endif 604 #endif 605 #if defined(PETSC_USE_CTABLE) 606 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 607 col = (col - 1)/bs; 608 #else 609 col = (baij->colmap[in[j]] - 1)/bs; 610 #endif 611 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 612 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 613 col = in[j]; 614 } 615 } else col = in[j]; 616 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!baij->donotstash) { 622 if (roworiented) { 623 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 624 } else { 625 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 626 } 627 } 628 } 629 } 630 PetscFunctionReturn(0); 631 } 632 633 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 634 { 635 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 636 PetscErrorCode ierr; 637 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 638 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 639 640 PetscFunctionBegin; 641 for (i=0; i<m; i++) { 642 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 643 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 644 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 645 row = idxm[i] - bsrstart; 646 for (j=0; j<n; j++) { 647 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 648 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 649 if (idxn[j] >= bscstart && idxn[j] < bscend) { 650 col = idxn[j] - bscstart; 651 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 652 } else { 653 if (!baij->colmap) { 654 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 655 } 656 #if defined(PETSC_USE_CTABLE) 657 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 658 data--; 659 #else 660 data = baij->colmap[idxn[j]/bs]-1; 661 #endif 662 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 663 else { 664 col = data + idxn[j]%bs; 665 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 666 } 667 } 668 } 669 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 670 } 671 PetscFunctionReturn(0); 672 } 673 674 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 675 { 676 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 677 PetscErrorCode ierr; 678 PetscReal sum[2],*lnorm2; 679 680 PetscFunctionBegin; 681 if (baij->size == 1) { 682 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 683 } else { 684 if (type == NORM_FROBENIUS) { 685 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 686 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 687 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 688 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 689 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 690 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 691 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 692 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 693 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 694 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 695 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 696 PetscReal *rsum,*rsum2,vabs; 697 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 698 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 699 MatScalar *v; 700 701 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 702 ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr); 703 /* Amat */ 704 v = amat->a; jj = amat->j; 705 for (brow=0; brow<mbs; brow++) { 706 grow = bs*(rstart + brow); 707 nz = amat->i[brow+1] - amat->i[brow]; 708 for (bcol=0; bcol<nz; bcol++) { 709 gcol = bs*(rstart + *jj); jj++; 710 for (col=0; col<bs; col++) { 711 for (row=0; row<bs; row++) { 712 vabs = PetscAbsScalar(*v); v++; 713 rsum[gcol+col] += vabs; 714 /* non-diagonal block */ 715 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 716 } 717 } 718 } 719 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 720 } 721 /* Bmat */ 722 v = bmat->a; jj = bmat->j; 723 for (brow=0; brow<mbs; brow++) { 724 grow = bs*(rstart + brow); 725 nz = bmat->i[brow+1] - bmat->i[brow]; 726 for (bcol=0; bcol<nz; bcol++) { 727 gcol = bs*garray[*jj]; jj++; 728 for (col=0; col<bs; col++) { 729 for (row=0; row<bs; row++) { 730 vabs = PetscAbsScalar(*v); v++; 731 rsum[gcol+col] += vabs; 732 rsum[grow+row] += vabs; 733 } 734 } 735 } 736 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 737 } 738 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 739 *norm = 0.0; 740 for (col=0; col<mat->cmap->N; col++) { 741 if (rsum2[col] > *norm) *norm = rsum2[col]; 742 } 743 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 744 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 745 } 746 PetscFunctionReturn(0); 747 } 748 749 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 750 { 751 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 752 PetscErrorCode ierr; 753 PetscInt nstash,reallocs; 754 755 PetscFunctionBegin; 756 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 757 758 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 759 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 760 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 761 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 762 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 763 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 768 { 769 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 770 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 771 PetscErrorCode ierr; 772 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 773 PetscInt *row,*col; 774 PetscBool other_disassembled; 775 PetscMPIInt n; 776 PetscBool r1,r2,r3; 777 MatScalar *val; 778 779 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 780 PetscFunctionBegin; 781 if (!baij->donotstash && !mat->nooffprocentries) { 782 while (1) { 783 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 784 if (!flg) break; 785 786 for (i=0; i<n;) { 787 /* Now identify the consecutive vals belonging to the same row */ 788 for (j=i,rstart=row[j]; j<n; j++) { 789 if (row[j] != rstart) break; 790 } 791 if (j < n) ncols = j-i; 792 else ncols = n-i; 793 /* Now assemble all these values with a single function call */ 794 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 795 i = j; 796 } 797 } 798 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 799 /* Now process the block-stash. Since the values are stashed column-oriented, 800 set the roworiented flag to column oriented, and after MatSetValues() 801 restore the original flags */ 802 r1 = baij->roworiented; 803 r2 = a->roworiented; 804 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 805 806 baij->roworiented = PETSC_FALSE; 807 a->roworiented = PETSC_FALSE; 808 809 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 810 while (1) { 811 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 812 if (!flg) break; 813 814 for (i=0; i<n;) { 815 /* Now identify the consecutive vals belonging to the same row */ 816 for (j=i,rstart=row[j]; j<n; j++) { 817 if (row[j] != rstart) break; 818 } 819 if (j < n) ncols = j-i; 820 else ncols = n-i; 821 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 822 i = j; 823 } 824 } 825 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 826 827 baij->roworiented = r1; 828 a->roworiented = r2; 829 830 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 831 } 832 833 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 835 836 /* determine if any processor has disassembled, if so we must 837 also disassemble ourselfs, in order that we may reassemble. */ 838 /* 839 if nonzero structure of submatrix B cannot change then we know that 840 no processor disassembled thus we can skip this stuff 841 */ 842 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 843 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 844 if (mat->was_assembled && !other_disassembled) { 845 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 846 } 847 } 848 849 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 850 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 851 } 852 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 853 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 854 855 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 856 857 baij->rowvalues = 0; 858 859 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 860 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 861 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 862 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 863 } 864 PetscFunctionReturn(0); 865 } 866 867 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 868 #include <petscdraw.h> 869 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 870 { 871 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 872 PetscErrorCode ierr; 873 PetscInt bs = mat->rmap->bs; 874 PetscMPIInt rank = baij->rank; 875 PetscBool iascii,isdraw; 876 PetscViewer sviewer; 877 PetscViewerFormat format; 878 879 PetscFunctionBegin; 880 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 881 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 882 if (iascii) { 883 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 884 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 885 MatInfo info; 886 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 889 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 890 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 892 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 894 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 895 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 896 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 897 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } else if (format == PETSC_VIEWER_ASCII_INFO) { 900 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 903 PetscFunctionReturn(0); 904 } 905 } 906 907 if (isdraw) { 908 PetscDraw draw; 909 PetscBool isnull; 910 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 911 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 912 if (isnull) PetscFunctionReturn(0); 913 } 914 915 { 916 /* assemble the entire matrix onto first processor. */ 917 Mat A; 918 Mat_SeqSBAIJ *Aloc; 919 Mat_SeqBAIJ *Bloc; 920 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 921 MatScalar *a; 922 const char *matname; 923 924 /* Should this be the same type as mat? */ 925 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 926 if (!rank) { 927 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 928 } else { 929 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 930 } 931 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 932 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 933 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 934 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 935 936 /* copy over the A part */ 937 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 938 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 939 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 940 941 for (i=0; i<mbs; i++) { 942 rvals[0] = bs*(baij->rstartbs + i); 943 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 944 for (j=ai[i]; j<ai[i+1]; j++) { 945 col = (baij->cstartbs+aj[j])*bs; 946 for (k=0; k<bs; k++) { 947 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 948 col++; 949 a += bs; 950 } 951 } 952 } 953 /* copy over the B part */ 954 Bloc = (Mat_SeqBAIJ*)baij->B->data; 955 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 956 for (i=0; i<mbs; i++) { 957 958 rvals[0] = bs*(baij->rstartbs + i); 959 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 960 for (j=ai[i]; j<ai[i+1]; j++) { 961 col = baij->garray[aj[j]]*bs; 962 for (k=0; k<bs; k++) { 963 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 964 col++; 965 a += bs; 966 } 967 } 968 } 969 ierr = PetscFree(rvals);CHKERRQ(ierr); 970 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 /* 973 Everyone has to call to draw the matrix since the graphics waits are 974 synchronized across all processors that share the PetscDraw object 975 */ 976 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 977 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 978 if (!rank) { 979 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 980 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 981 } 982 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 983 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 984 ierr = MatDestroy(&A);CHKERRQ(ierr); 985 } 986 PetscFunctionReturn(0); 987 } 988 989 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 990 { 991 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 992 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 993 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 994 PetscErrorCode ierr; 995 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 996 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 997 int fd; 998 PetscScalar *column_values; 999 FILE *file; 1000 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1001 PetscInt message_count,flowcontrolcount; 1002 1003 PetscFunctionBegin; 1004 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1005 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1006 nz = bs2*(A->nz + B->nz); 1007 rlen = mat->rmap->n; 1008 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1009 if (!rank) { 1010 header[0] = MAT_FILE_CLASSID; 1011 header[1] = mat->rmap->N; 1012 header[2] = mat->cmap->N; 1013 1014 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1015 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1016 /* get largest number of rows any processor has */ 1017 range = mat->rmap->range; 1018 for (i=1; i<size; i++) { 1019 rlen = PetscMax(rlen,range[i+1] - range[i]); 1020 } 1021 } else { 1022 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1023 } 1024 1025 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1026 /* compute lengths of each row */ 1027 for (i=0; i<a->mbs; i++) { 1028 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1029 } 1030 /* store the row lengths to the file */ 1031 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1032 if (!rank) { 1033 MPI_Status status; 1034 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1035 rlen = (range[1] - range[0])/bs; 1036 for (i=0; i<rlen; i++) { 1037 for (j=0; j<bs; j++) { 1038 row_lens[i*bs+j] = bs*crow_lens[i]; 1039 } 1040 } 1041 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1042 for (i=1; i<size; i++) { 1043 rlen = (range[i+1] - range[i])/bs; 1044 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1045 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1046 for (k=0; k<rlen; k++) { 1047 for (j=0; j<bs; j++) { 1048 row_lens[k*bs+j] = bs*crow_lens[k]; 1049 } 1050 } 1051 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1052 } 1053 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1054 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1055 } else { 1056 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1057 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1058 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1059 } 1060 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1061 1062 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1063 information needed to make it for each row from a block row. This does require more communication but still not more than 1064 the communication needed for the nonzero values */ 1065 nzmax = nz; /* space a largest processor needs */ 1066 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1067 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1068 cnt = 0; 1069 for (i=0; i<a->mbs; i++) { 1070 pcnt = cnt; 1071 for (j=B->i[i]; j<B->i[i+1]; j++) { 1072 if ((col = garray[B->j[j]]) > cstart) break; 1073 for (l=0; l<bs; l++) { 1074 column_indices[cnt++] = bs*col+l; 1075 } 1076 } 1077 for (k=A->i[i]; k<A->i[i+1]; k++) { 1078 for (l=0; l<bs; l++) { 1079 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1080 } 1081 } 1082 for (; j<B->i[i+1]; j++) { 1083 for (l=0; l<bs; l++) { 1084 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1085 } 1086 } 1087 len = cnt - pcnt; 1088 for (k=1; k<bs; k++) { 1089 ierr = PetscArraycpy(&column_indices[cnt],&column_indices[pcnt],len);CHKERRQ(ierr); 1090 cnt += len; 1091 } 1092 } 1093 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1094 1095 /* store the columns to the file */ 1096 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1097 if (!rank) { 1098 MPI_Status status; 1099 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1100 for (i=1; i<size; i++) { 1101 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1102 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1103 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1104 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1105 } 1106 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1107 } else { 1108 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1109 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1110 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1111 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1112 } 1113 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1114 1115 /* load up the numerical values */ 1116 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1117 cnt = 0; 1118 for (i=0; i<a->mbs; i++) { 1119 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1120 for (j=B->i[i]; j<B->i[i+1]; j++) { 1121 if (garray[B->j[j]] > cstart) break; 1122 for (l=0; l<bs; l++) { 1123 for (ll=0; ll<bs; ll++) { 1124 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1125 } 1126 } 1127 cnt += bs; 1128 } 1129 for (k=A->i[i]; k<A->i[i+1]; k++) { 1130 for (l=0; l<bs; l++) { 1131 for (ll=0; ll<bs; ll++) { 1132 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1133 } 1134 } 1135 cnt += bs; 1136 } 1137 for (; j<B->i[i+1]; j++) { 1138 for (l=0; l<bs; l++) { 1139 for (ll=0; ll<bs; ll++) { 1140 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1141 } 1142 } 1143 cnt += bs; 1144 } 1145 cnt += (bs-1)*rlen; 1146 } 1147 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1148 1149 /* store the column values to the file */ 1150 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1151 if (!rank) { 1152 MPI_Status status; 1153 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1154 for (i=1; i<size; i++) { 1155 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1156 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1157 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1158 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1159 } 1160 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1161 } else { 1162 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1163 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1164 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1165 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1166 } 1167 ierr = PetscFree(column_values);CHKERRQ(ierr); 1168 1169 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1170 if (file) { 1171 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1177 { 1178 PetscErrorCode ierr; 1179 PetscBool iascii,isdraw,issocket,isbinary; 1180 1181 PetscFunctionBegin; 1182 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1183 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1184 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1185 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1186 if (iascii || isdraw || issocket) { 1187 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1188 } else if (isbinary) { 1189 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1195 { 1196 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 #if defined(PETSC_USE_LOG) 1201 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1202 #endif 1203 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1204 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1205 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1206 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1207 #if defined(PETSC_USE_CTABLE) 1208 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1209 #else 1210 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1211 #endif 1212 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1213 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1214 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1215 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1216 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1217 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1218 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1219 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1220 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1221 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1222 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1223 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1224 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1225 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1226 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1227 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1228 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1229 #endif 1230 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1231 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1232 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1233 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1234 1235 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1240 #if defined(PETSC_HAVE_ELEMENTAL) 1241 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1242 #endif 1243 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1249 { 1250 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1251 PetscErrorCode ierr; 1252 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1253 PetscScalar *from; 1254 const PetscScalar *x; 1255 1256 PetscFunctionBegin; 1257 /* diagonal part */ 1258 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1259 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1260 1261 /* subdiagonal part */ 1262 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1263 1264 /* copy x into the vec slvec0 */ 1265 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1266 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1267 1268 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1269 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1270 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1271 1272 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1273 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1274 /* supperdiagonal part */ 1275 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1280 { 1281 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1282 PetscErrorCode ierr; 1283 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1284 PetscScalar *from; 1285 const PetscScalar *x; 1286 1287 PetscFunctionBegin; 1288 /* diagonal part */ 1289 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1290 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1291 1292 /* subdiagonal part */ 1293 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1294 1295 /* copy x into the vec slvec0 */ 1296 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1297 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1298 1299 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1300 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1301 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1302 1303 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1304 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1305 /* supperdiagonal part */ 1306 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1311 { 1312 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1313 PetscErrorCode ierr; 1314 PetscInt nt; 1315 1316 PetscFunctionBegin; 1317 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1318 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1319 1320 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1321 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1322 1323 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1324 /* do diagonal part */ 1325 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1326 /* do supperdiagonal part */ 1327 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1328 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1329 /* do subdiagonal part */ 1330 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1331 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1332 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz) 1337 { 1338 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1339 PetscErrorCode ierr; 1340 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1341 PetscScalar *from,zero=0.0; 1342 const PetscScalar *x; 1343 1344 PetscFunctionBegin; 1345 /* diagonal part */ 1346 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1347 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1348 1349 /* subdiagonal part */ 1350 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1351 1352 /* copy x into the vec slvec0 */ 1353 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1354 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1355 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1356 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1357 1358 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1359 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1360 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1361 1362 /* supperdiagonal part */ 1363 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1368 { 1369 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1370 PetscErrorCode ierr; 1371 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1372 PetscScalar *from,zero=0.0; 1373 const PetscScalar *x; 1374 1375 PetscFunctionBegin; 1376 /* diagonal part */ 1377 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1378 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1379 1380 /* subdiagonal part */ 1381 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1382 1383 /* copy x into the vec slvec0 */ 1384 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1385 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1386 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1387 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1388 1389 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1390 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1391 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1392 1393 /* supperdiagonal part */ 1394 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1395 PetscFunctionReturn(0); 1396 } 1397 1398 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1399 { 1400 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1405 /* do diagonal part */ 1406 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1407 /* do supperdiagonal part */ 1408 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1409 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1410 1411 /* do subdiagonal part */ 1412 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1413 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1414 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1415 PetscFunctionReturn(0); 1416 } 1417 1418 /* 1419 This only works correctly for square matrices where the subblock A->A is the 1420 diagonal block 1421 */ 1422 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1423 { 1424 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1425 PetscErrorCode ierr; 1426 1427 PetscFunctionBegin; 1428 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1429 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 1433 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1434 { 1435 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1436 PetscErrorCode ierr; 1437 1438 PetscFunctionBegin; 1439 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1440 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1445 { 1446 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1447 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1448 PetscErrorCode ierr; 1449 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1450 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1451 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1452 1453 PetscFunctionBegin; 1454 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1455 mat->getrowactive = PETSC_TRUE; 1456 1457 if (!mat->rowvalues && (idx || v)) { 1458 /* 1459 allocate enough space to hold information from the longest row. 1460 */ 1461 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1462 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1463 PetscInt max = 1,mbs = mat->mbs,tmp; 1464 for (i=0; i<mbs; i++) { 1465 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1466 if (max < tmp) max = tmp; 1467 } 1468 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1469 } 1470 1471 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1472 lrow = row - brstart; /* local row index */ 1473 1474 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1475 if (!v) {pvA = 0; pvB = 0;} 1476 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1477 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1478 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1479 nztot = nzA + nzB; 1480 1481 cmap = mat->garray; 1482 if (v || idx) { 1483 if (nztot) { 1484 /* Sort by increasing column numbers, assuming A and B already sorted */ 1485 PetscInt imark = -1; 1486 if (v) { 1487 *v = v_p = mat->rowvalues; 1488 for (i=0; i<nzB; i++) { 1489 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1490 else break; 1491 } 1492 imark = i; 1493 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1494 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1495 } 1496 if (idx) { 1497 *idx = idx_p = mat->rowindices; 1498 if (imark > -1) { 1499 for (i=0; i<imark; i++) { 1500 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1501 } 1502 } else { 1503 for (i=0; i<nzB; i++) { 1504 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1505 else break; 1506 } 1507 imark = i; 1508 } 1509 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1510 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1511 } 1512 } else { 1513 if (idx) *idx = 0; 1514 if (v) *v = 0; 1515 } 1516 } 1517 *nz = nztot; 1518 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1519 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1524 { 1525 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1526 1527 PetscFunctionBegin; 1528 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1529 baij->getrowactive = PETSC_FALSE; 1530 PetscFunctionReturn(0); 1531 } 1532 1533 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1534 { 1535 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1536 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1537 1538 PetscFunctionBegin; 1539 aA->getrow_utriangular = PETSC_TRUE; 1540 PetscFunctionReturn(0); 1541 } 1542 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1543 { 1544 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1545 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1546 1547 PetscFunctionBegin; 1548 aA->getrow_utriangular = PETSC_FALSE; 1549 PetscFunctionReturn(0); 1550 } 1551 1552 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1553 { 1554 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1555 PetscErrorCode ierr; 1556 1557 PetscFunctionBegin; 1558 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1559 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1560 PetscFunctionReturn(0); 1561 } 1562 1563 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1564 { 1565 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1566 PetscErrorCode ierr; 1567 1568 PetscFunctionBegin; 1569 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1570 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1575 Input: isrow - distributed(parallel), 1576 iscol_local - locally owned (seq) 1577 */ 1578 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1579 { 1580 PetscErrorCode ierr; 1581 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1582 const PetscInt *ptr1,*ptr2; 1583 1584 PetscFunctionBegin; 1585 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1586 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1587 if (sz1 > sz2) { 1588 *flg = PETSC_FALSE; 1589 PetscFunctionReturn(0); 1590 } 1591 1592 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1593 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1594 1595 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1596 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1597 ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr); 1598 ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr); 1599 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1600 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1601 1602 nmatch=0; 1603 k = 0; 1604 for (i=0; i<sz1; i++){ 1605 for (j=k; j<sz2; j++){ 1606 if (a1[i] == a2[j]) { 1607 k = j; nmatch++; 1608 break; 1609 } 1610 } 1611 } 1612 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1613 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1614 ierr = PetscFree(a1);CHKERRQ(ierr); 1615 ierr = PetscFree(a2);CHKERRQ(ierr); 1616 if (nmatch < sz1) { 1617 *flg = PETSC_FALSE; 1618 } else { 1619 *flg = PETSC_TRUE; 1620 } 1621 PetscFunctionReturn(0); 1622 } 1623 1624 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1625 { 1626 PetscErrorCode ierr; 1627 IS iscol_local; 1628 PetscInt csize; 1629 PetscBool isequal; 1630 1631 PetscFunctionBegin; 1632 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1633 if (call == MAT_REUSE_MATRIX) { 1634 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1635 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1636 } else { 1637 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1638 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1639 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1640 } 1641 1642 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1643 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1644 if (call == MAT_INITIAL_MATRIX) { 1645 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1646 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1647 } 1648 PetscFunctionReturn(0); 1649 } 1650 1651 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1652 { 1653 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1654 PetscErrorCode ierr; 1655 1656 PetscFunctionBegin; 1657 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1658 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1659 PetscFunctionReturn(0); 1660 } 1661 1662 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1663 { 1664 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1665 Mat A = a->A,B = a->B; 1666 PetscErrorCode ierr; 1667 PetscLogDouble isend[5],irecv[5]; 1668 1669 PetscFunctionBegin; 1670 info->block_size = (PetscReal)matin->rmap->bs; 1671 1672 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1673 1674 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1675 isend[3] = info->memory; isend[4] = info->mallocs; 1676 1677 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1678 1679 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1680 isend[3] += info->memory; isend[4] += info->mallocs; 1681 if (flag == MAT_LOCAL) { 1682 info->nz_used = isend[0]; 1683 info->nz_allocated = isend[1]; 1684 info->nz_unneeded = isend[2]; 1685 info->memory = isend[3]; 1686 info->mallocs = isend[4]; 1687 } else if (flag == MAT_GLOBAL_MAX) { 1688 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1689 1690 info->nz_used = irecv[0]; 1691 info->nz_allocated = irecv[1]; 1692 info->nz_unneeded = irecv[2]; 1693 info->memory = irecv[3]; 1694 info->mallocs = irecv[4]; 1695 } else if (flag == MAT_GLOBAL_SUM) { 1696 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1697 1698 info->nz_used = irecv[0]; 1699 info->nz_allocated = irecv[1]; 1700 info->nz_unneeded = irecv[2]; 1701 info->memory = irecv[3]; 1702 info->mallocs = irecv[4]; 1703 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1704 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1705 info->fill_ratio_needed = 0; 1706 info->factor_mallocs = 0; 1707 PetscFunctionReturn(0); 1708 } 1709 1710 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1711 { 1712 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1713 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 switch (op) { 1718 case MAT_NEW_NONZERO_LOCATIONS: 1719 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1720 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1721 case MAT_KEEP_NONZERO_PATTERN: 1722 case MAT_SUBMAT_SINGLEIS: 1723 case MAT_NEW_NONZERO_LOCATION_ERR: 1724 MatCheckPreallocated(A,1); 1725 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1726 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1727 break; 1728 case MAT_ROW_ORIENTED: 1729 MatCheckPreallocated(A,1); 1730 a->roworiented = flg; 1731 1732 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1733 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1734 break; 1735 case MAT_NEW_DIAGONALS: 1736 case MAT_SORTED_FULL: 1737 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1738 break; 1739 case MAT_IGNORE_OFF_PROC_ENTRIES: 1740 a->donotstash = flg; 1741 break; 1742 case MAT_USE_HASH_TABLE: 1743 a->ht_flag = flg; 1744 break; 1745 case MAT_HERMITIAN: 1746 MatCheckPreallocated(A,1); 1747 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1748 #if defined(PETSC_USE_COMPLEX) 1749 if (flg) { /* need different mat-vec ops */ 1750 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1751 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1752 A->ops->multtranspose = NULL; 1753 A->ops->multtransposeadd = NULL; 1754 A->symmetric = PETSC_FALSE; 1755 } 1756 #endif 1757 break; 1758 case MAT_SPD: 1759 case MAT_SYMMETRIC: 1760 MatCheckPreallocated(A,1); 1761 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1762 #if defined(PETSC_USE_COMPLEX) 1763 if (flg) { /* restore to use default mat-vec ops */ 1764 A->ops->mult = MatMult_MPISBAIJ; 1765 A->ops->multadd = MatMultAdd_MPISBAIJ; 1766 A->ops->multtranspose = MatMult_MPISBAIJ; 1767 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1768 } 1769 #endif 1770 break; 1771 case MAT_STRUCTURALLY_SYMMETRIC: 1772 MatCheckPreallocated(A,1); 1773 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1774 break; 1775 case MAT_SYMMETRY_ETERNAL: 1776 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1777 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1778 break; 1779 case MAT_IGNORE_LOWER_TRIANGULAR: 1780 aA->ignore_ltriangular = flg; 1781 break; 1782 case MAT_ERROR_LOWER_TRIANGULAR: 1783 aA->ignore_ltriangular = flg; 1784 break; 1785 case MAT_GETROW_UPPERTRIANGULAR: 1786 aA->getrow_utriangular = flg; 1787 break; 1788 default: 1789 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1790 } 1791 PetscFunctionReturn(0); 1792 } 1793 1794 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1795 { 1796 PetscErrorCode ierr; 1797 1798 PetscFunctionBegin; 1799 if (reuse == MAT_INITIAL_MATRIX) { 1800 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1801 } else if (reuse == MAT_REUSE_MATRIX) { 1802 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1803 } 1804 PetscFunctionReturn(0); 1805 } 1806 1807 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1808 { 1809 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1810 Mat a = baij->A, b=baij->B; 1811 PetscErrorCode ierr; 1812 PetscInt nv,m,n; 1813 PetscBool flg; 1814 1815 PetscFunctionBegin; 1816 if (ll != rr) { 1817 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1818 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1819 } 1820 if (!ll) PetscFunctionReturn(0); 1821 1822 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1823 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1824 1825 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1826 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1827 1828 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1829 1830 /* left diagonalscale the off-diagonal part */ 1831 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1832 1833 /* scale the diagonal part */ 1834 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1835 1836 /* right diagonalscale the off-diagonal part */ 1837 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1838 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1843 { 1844 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1845 PetscErrorCode ierr; 1846 1847 PetscFunctionBegin; 1848 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1849 PetscFunctionReturn(0); 1850 } 1851 1852 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1853 1854 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1855 { 1856 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1857 Mat a,b,c,d; 1858 PetscBool flg; 1859 PetscErrorCode ierr; 1860 1861 PetscFunctionBegin; 1862 a = matA->A; b = matA->B; 1863 c = matB->A; d = matB->B; 1864 1865 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1866 if (flg) { 1867 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1868 } 1869 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1870 PetscFunctionReturn(0); 1871 } 1872 1873 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1874 { 1875 PetscErrorCode ierr; 1876 PetscBool isbaij; 1877 1878 PetscFunctionBegin; 1879 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 1880 if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1881 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1882 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1883 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1884 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1885 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1886 } else { 1887 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1888 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1889 1890 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1891 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1892 } 1893 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1894 PetscFunctionReturn(0); 1895 } 1896 1897 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1898 { 1899 PetscErrorCode ierr; 1900 1901 PetscFunctionBegin; 1902 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1907 { 1908 PetscErrorCode ierr; 1909 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1910 PetscBLASInt bnz,one=1; 1911 Mat_SeqSBAIJ *xa,*ya; 1912 Mat_SeqBAIJ *xb,*yb; 1913 1914 PetscFunctionBegin; 1915 if (str == SAME_NONZERO_PATTERN) { 1916 PetscScalar alpha = a; 1917 xa = (Mat_SeqSBAIJ*)xx->A->data; 1918 ya = (Mat_SeqSBAIJ*)yy->A->data; 1919 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1920 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1921 xb = (Mat_SeqBAIJ*)xx->B->data; 1922 yb = (Mat_SeqBAIJ*)yy->B->data; 1923 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1924 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1925 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1926 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1927 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1928 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1929 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1930 } else { 1931 Mat B; 1932 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1933 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1934 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1935 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1936 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1937 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1938 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1939 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1940 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1941 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1942 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1943 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1944 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1945 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1946 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1947 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1948 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1949 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1950 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1951 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1952 } 1953 PetscFunctionReturn(0); 1954 } 1955 1956 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1957 { 1958 PetscErrorCode ierr; 1959 PetscInt i; 1960 PetscBool flg; 1961 1962 PetscFunctionBegin; 1963 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1964 for (i=0; i<n; i++) { 1965 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1966 if (!flg) { 1967 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1968 } 1969 } 1970 PetscFunctionReturn(0); 1971 } 1972 1973 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1974 { 1975 PetscErrorCode ierr; 1976 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1977 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1978 1979 PetscFunctionBegin; 1980 if (!Y->preallocated) { 1981 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1982 } else if (!aij->nz) { 1983 PetscInt nonew = aij->nonew; 1984 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1985 aij->nonew = nonew; 1986 } 1987 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1988 PetscFunctionReturn(0); 1989 } 1990 1991 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1992 { 1993 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1994 PetscErrorCode ierr; 1995 1996 PetscFunctionBegin; 1997 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1998 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1999 if (d) { 2000 PetscInt rstart; 2001 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2002 *d += rstart/A->rmap->bs; 2003 2004 } 2005 PetscFunctionReturn(0); 2006 } 2007 2008 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 2009 { 2010 PetscFunctionBegin; 2011 *a = ((Mat_MPISBAIJ*)A->data)->A; 2012 PetscFunctionReturn(0); 2013 } 2014 2015 /* -------------------------------------------------------------------*/ 2016 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 2017 MatGetRow_MPISBAIJ, 2018 MatRestoreRow_MPISBAIJ, 2019 MatMult_MPISBAIJ, 2020 /* 4*/ MatMultAdd_MPISBAIJ, 2021 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 2022 MatMultAdd_MPISBAIJ, 2023 0, 2024 0, 2025 0, 2026 /* 10*/ 0, 2027 0, 2028 0, 2029 MatSOR_MPISBAIJ, 2030 MatTranspose_MPISBAIJ, 2031 /* 15*/ MatGetInfo_MPISBAIJ, 2032 MatEqual_MPISBAIJ, 2033 MatGetDiagonal_MPISBAIJ, 2034 MatDiagonalScale_MPISBAIJ, 2035 MatNorm_MPISBAIJ, 2036 /* 20*/ MatAssemblyBegin_MPISBAIJ, 2037 MatAssemblyEnd_MPISBAIJ, 2038 MatSetOption_MPISBAIJ, 2039 MatZeroEntries_MPISBAIJ, 2040 /* 24*/ 0, 2041 0, 2042 0, 2043 0, 2044 0, 2045 /* 29*/ MatSetUp_MPISBAIJ, 2046 0, 2047 0, 2048 MatGetDiagonalBlock_MPISBAIJ, 2049 0, 2050 /* 34*/ MatDuplicate_MPISBAIJ, 2051 0, 2052 0, 2053 0, 2054 0, 2055 /* 39*/ MatAXPY_MPISBAIJ, 2056 MatCreateSubMatrices_MPISBAIJ, 2057 MatIncreaseOverlap_MPISBAIJ, 2058 MatGetValues_MPISBAIJ, 2059 MatCopy_MPISBAIJ, 2060 /* 44*/ 0, 2061 MatScale_MPISBAIJ, 2062 MatShift_MPISBAIJ, 2063 0, 2064 0, 2065 /* 49*/ 0, 2066 0, 2067 0, 2068 0, 2069 0, 2070 /* 54*/ 0, 2071 0, 2072 MatSetUnfactored_MPISBAIJ, 2073 0, 2074 MatSetValuesBlocked_MPISBAIJ, 2075 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 2076 0, 2077 0, 2078 0, 2079 0, 2080 /* 64*/ 0, 2081 0, 2082 0, 2083 0, 2084 0, 2085 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 2086 0, 2087 MatConvert_MPISBAIJ_Basic, 2088 0, 2089 0, 2090 /* 74*/ 0, 2091 0, 2092 0, 2093 0, 2094 0, 2095 /* 79*/ 0, 2096 0, 2097 0, 2098 0, 2099 MatLoad_MPISBAIJ, 2100 /* 84*/ 0, 2101 0, 2102 0, 2103 0, 2104 0, 2105 /* 89*/ 0, 2106 0, 2107 0, 2108 0, 2109 0, 2110 /* 94*/ 0, 2111 0, 2112 0, 2113 0, 2114 0, 2115 /* 99*/ 0, 2116 0, 2117 0, 2118 0, 2119 0, 2120 /*104*/ 0, 2121 MatRealPart_MPISBAIJ, 2122 MatImaginaryPart_MPISBAIJ, 2123 MatGetRowUpperTriangular_MPISBAIJ, 2124 MatRestoreRowUpperTriangular_MPISBAIJ, 2125 /*109*/ 0, 2126 0, 2127 0, 2128 0, 2129 MatMissingDiagonal_MPISBAIJ, 2130 /*114*/ 0, 2131 0, 2132 0, 2133 0, 2134 0, 2135 /*119*/ 0, 2136 0, 2137 0, 2138 0, 2139 0, 2140 /*124*/ 0, 2141 0, 2142 0, 2143 0, 2144 0, 2145 /*129*/ 0, 2146 0, 2147 0, 2148 0, 2149 0, 2150 /*134*/ 0, 2151 0, 2152 0, 2153 0, 2154 0, 2155 /*139*/ MatSetBlockSizes_Default, 2156 0, 2157 0, 2158 0, 2159 0, 2160 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2161 }; 2162 2163 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2164 { 2165 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 2166 PetscErrorCode ierr; 2167 PetscInt i,mbs,Mbs; 2168 PetscMPIInt size; 2169 2170 PetscFunctionBegin; 2171 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2172 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2173 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2174 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2175 if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N); 2176 if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n); 2177 2178 mbs = B->rmap->n/bs; 2179 Mbs = B->rmap->N/bs; 2180 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2181 2182 B->rmap->bs = bs; 2183 b->bs2 = bs*bs; 2184 b->mbs = mbs; 2185 b->Mbs = Mbs; 2186 b->nbs = B->cmap->n/bs; 2187 b->Nbs = B->cmap->N/bs; 2188 2189 for (i=0; i<=b->size; i++) { 2190 b->rangebs[i] = B->rmap->range[i]/bs; 2191 } 2192 b->rstartbs = B->rmap->rstart/bs; 2193 b->rendbs = B->rmap->rend/bs; 2194 2195 b->cstartbs = B->cmap->rstart/bs; 2196 b->cendbs = B->cmap->rend/bs; 2197 2198 #if defined(PETSC_USE_CTABLE) 2199 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2200 #else 2201 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2202 #endif 2203 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2204 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2205 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2206 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2207 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2208 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2209 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2210 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2211 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2212 2213 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2214 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2215 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2216 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2217 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2218 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2219 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2220 2221 if (!B->preallocated) { 2222 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2223 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2224 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2225 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2226 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2227 } 2228 2229 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2230 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2231 2232 B->preallocated = PETSC_TRUE; 2233 B->was_assembled = PETSC_FALSE; 2234 B->assembled = PETSC_FALSE; 2235 PetscFunctionReturn(0); 2236 } 2237 2238 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2239 { 2240 PetscInt m,rstart,cend; 2241 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0; 2242 const PetscInt *JJ =0; 2243 PetscScalar *values=0; 2244 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 2245 PetscErrorCode ierr; 2246 PetscBool nooffprocentries; 2247 2248 PetscFunctionBegin; 2249 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2250 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2251 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2252 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2253 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2254 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2255 m = B->rmap->n/bs; 2256 rstart = B->rmap->rstart/bs; 2257 cend = B->cmap->rend/bs; 2258 2259 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2260 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2261 for (i=0; i<m; i++) { 2262 nz = ii[i+1] - ii[i]; 2263 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2264 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2265 JJ = jj + ii[i]; 2266 bd = 0; 2267 for (j=0; j<nz; j++) { 2268 if (*JJ >= i + rstart) break; 2269 JJ++; 2270 bd++; 2271 } 2272 d = 0; 2273 for (; j<nz; j++) { 2274 if (*JJ++ >= cend) break; 2275 d++; 2276 } 2277 d_nnz[i] = d; 2278 o_nnz[i] = nz - d - bd; 2279 nz = nz - bd; 2280 nz_max = PetscMax(nz_max,nz); 2281 } 2282 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2283 ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2284 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2285 2286 values = (PetscScalar*)V; 2287 if (!values) { 2288 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2289 } 2290 for (i=0; i<m; i++) { 2291 PetscInt row = i + rstart; 2292 PetscInt ncols = ii[i+1] - ii[i]; 2293 const PetscInt *icols = jj + ii[i]; 2294 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2295 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2296 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2297 } else { /* block ordering does not match so we can only insert one block at a time. */ 2298 PetscInt j; 2299 for (j=0; j<ncols; j++) { 2300 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2301 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2302 } 2303 } 2304 } 2305 2306 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2307 nooffprocentries = B->nooffprocentries; 2308 B->nooffprocentries = PETSC_TRUE; 2309 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2310 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2311 B->nooffprocentries = nooffprocentries; 2312 2313 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 /*MC 2318 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2319 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2320 the matrix is stored. 2321 2322 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2323 can call MatSetOption(Mat, MAT_HERMITIAN); 2324 2325 Options Database Keys: 2326 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2327 2328 Notes: 2329 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2330 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2331 2332 Level: beginner 2333 2334 .seealso: MatCreateMPISBAIJ(), MATSEQSBAIJ, MatType 2335 M*/ 2336 2337 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2338 { 2339 Mat_MPISBAIJ *b; 2340 PetscErrorCode ierr; 2341 PetscBool flg = PETSC_FALSE; 2342 2343 PetscFunctionBegin; 2344 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2345 B->data = (void*)b; 2346 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2347 2348 B->ops->destroy = MatDestroy_MPISBAIJ; 2349 B->ops->view = MatView_MPISBAIJ; 2350 B->assembled = PETSC_FALSE; 2351 B->insertmode = NOT_SET_VALUES; 2352 2353 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2354 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2355 2356 /* build local table of row and column ownerships */ 2357 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2358 2359 /* build cache for off array entries formed */ 2360 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2361 2362 b->donotstash = PETSC_FALSE; 2363 b->colmap = NULL; 2364 b->garray = NULL; 2365 b->roworiented = PETSC_TRUE; 2366 2367 /* stuff used in block assembly */ 2368 b->barray = 0; 2369 2370 /* stuff used for matrix vector multiply */ 2371 b->lvec = 0; 2372 b->Mvctx = 0; 2373 b->slvec0 = 0; 2374 b->slvec0b = 0; 2375 b->slvec1 = 0; 2376 b->slvec1a = 0; 2377 b->slvec1b = 0; 2378 b->sMvctx = 0; 2379 2380 /* stuff for MatGetRow() */ 2381 b->rowindices = 0; 2382 b->rowvalues = 0; 2383 b->getrowactive = PETSC_FALSE; 2384 2385 /* hash table stuff */ 2386 b->ht = 0; 2387 b->hd = 0; 2388 b->ht_size = 0; 2389 b->ht_flag = PETSC_FALSE; 2390 b->ht_fact = 0; 2391 b->ht_total_ct = 0; 2392 b->ht_insert_ct = 0; 2393 2394 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2395 b->ijonly = PETSC_FALSE; 2396 2397 b->in_loc = 0; 2398 b->v_loc = 0; 2399 b->n_loc = 0; 2400 2401 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2402 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2403 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2404 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2405 #if defined(PETSC_HAVE_ELEMENTAL) 2406 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2407 #endif 2408 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2409 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2410 2411 B->symmetric = PETSC_TRUE; 2412 B->structurally_symmetric = PETSC_TRUE; 2413 B->symmetric_set = PETSC_TRUE; 2414 B->structurally_symmetric_set = PETSC_TRUE; 2415 B->symmetric_eternal = PETSC_TRUE; 2416 #if defined(PETSC_USE_COMPLEX) 2417 B->hermitian = PETSC_FALSE; 2418 B->hermitian_set = PETSC_FALSE; 2419 #else 2420 B->hermitian = PETSC_TRUE; 2421 B->hermitian_set = PETSC_TRUE; 2422 #endif 2423 2424 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2425 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2426 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2427 if (flg) { 2428 PetscReal fact = 1.39; 2429 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2430 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2431 if (fact <= 1.0) fact = 1.39; 2432 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2433 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2434 } 2435 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 /*MC 2440 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2441 2442 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2443 and MATMPISBAIJ otherwise. 2444 2445 Options Database Keys: 2446 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2447 2448 Level: beginner 2449 2450 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2451 M*/ 2452 2453 /*@C 2454 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2455 the user should preallocate the matrix storage by setting the parameters 2456 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2457 performance can be increased by more than a factor of 50. 2458 2459 Collective on Mat 2460 2461 Input Parameters: 2462 + B - the matrix 2463 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2464 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2465 . d_nz - number of block nonzeros per block row in diagonal portion of local 2466 submatrix (same for all local rows) 2467 . d_nnz - array containing the number of block nonzeros in the various block rows 2468 in the upper triangular and diagonal part of the in diagonal portion of the local 2469 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2470 for the diagonal entry and set a value even if it is zero. 2471 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2472 submatrix (same for all local rows). 2473 - o_nnz - array containing the number of nonzeros in the various block rows of the 2474 off-diagonal portion of the local submatrix that is right of the diagonal 2475 (possibly different for each block row) or NULL. 2476 2477 2478 Options Database Keys: 2479 + -mat_no_unroll - uses code that does not unroll the loops in the 2480 block calculations (much slower) 2481 - -mat_block_size - size of the blocks to use 2482 2483 Notes: 2484 2485 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2486 than it must be used on all processors that share the object for that argument. 2487 2488 If the *_nnz parameter is given then the *_nz parameter is ignored 2489 2490 Storage Information: 2491 For a square global matrix we define each processor's diagonal portion 2492 to be its local rows and the corresponding columns (a square submatrix); 2493 each processor's off-diagonal portion encompasses the remainder of the 2494 local matrix (a rectangular submatrix). 2495 2496 The user can specify preallocated storage for the diagonal part of 2497 the local submatrix with either d_nz or d_nnz (not both). Set 2498 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2499 memory allocation. Likewise, specify preallocated storage for the 2500 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2501 2502 You can call MatGetInfo() to get information on how effective the preallocation was; 2503 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2504 You can also run with the option -info and look for messages with the string 2505 malloc in them to see if additional memory allocation was needed. 2506 2507 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2508 the figure below we depict these three local rows and all columns (0-11). 2509 2510 .vb 2511 0 1 2 3 4 5 6 7 8 9 10 11 2512 -------------------------- 2513 row 3 |. . . d d d o o o o o o 2514 row 4 |. . . d d d o o o o o o 2515 row 5 |. . . d d d o o o o o o 2516 -------------------------- 2517 .ve 2518 2519 Thus, any entries in the d locations are stored in the d (diagonal) 2520 submatrix, and any entries in the o locations are stored in the 2521 o (off-diagonal) submatrix. Note that the d matrix is stored in 2522 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2523 2524 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2525 plus the diagonal part of the d matrix, 2526 and o_nz should indicate the number of block nonzeros per row in the o matrix 2527 2528 In general, for PDE problems in which most nonzeros are near the diagonal, 2529 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2530 or you will get TERRIBLE performance; see the users' manual chapter on 2531 matrices. 2532 2533 Level: intermediate 2534 2535 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2536 @*/ 2537 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2538 { 2539 PetscErrorCode ierr; 2540 2541 PetscFunctionBegin; 2542 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2543 PetscValidType(B,1); 2544 PetscValidLogicalCollectiveInt(B,bs,2); 2545 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 /*@C 2550 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2551 (block compressed row). For good matrix assembly performance 2552 the user should preallocate the matrix storage by setting the parameters 2553 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2554 performance can be increased by more than a factor of 50. 2555 2556 Collective 2557 2558 Input Parameters: 2559 + comm - MPI communicator 2560 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2561 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2562 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2563 This value should be the same as the local size used in creating the 2564 y vector for the matrix-vector product y = Ax. 2565 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2566 This value should be the same as the local size used in creating the 2567 x vector for the matrix-vector product y = Ax. 2568 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2569 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2570 . d_nz - number of block nonzeros per block row in diagonal portion of local 2571 submatrix (same for all local rows) 2572 . d_nnz - array containing the number of block nonzeros in the various block rows 2573 in the upper triangular portion of the in diagonal portion of the local 2574 (possibly different for each block block row) or NULL. 2575 If you plan to factor the matrix you must leave room for the diagonal entry and 2576 set its value even if it is zero. 2577 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2578 submatrix (same for all local rows). 2579 - o_nnz - array containing the number of nonzeros in the various block rows of the 2580 off-diagonal portion of the local submatrix (possibly different for 2581 each block row) or NULL. 2582 2583 Output Parameter: 2584 . A - the matrix 2585 2586 Options Database Keys: 2587 + -mat_no_unroll - uses code that does not unroll the loops in the 2588 block calculations (much slower) 2589 . -mat_block_size - size of the blocks to use 2590 - -mat_mpi - use the parallel matrix data structures even on one processor 2591 (defaults to using SeqBAIJ format on one processor) 2592 2593 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2594 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2595 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2596 2597 Notes: 2598 The number of rows and columns must be divisible by blocksize. 2599 This matrix type does not support complex Hermitian operation. 2600 2601 The user MUST specify either the local or global matrix dimensions 2602 (possibly both). 2603 2604 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2605 than it must be used on all processors that share the object for that argument. 2606 2607 If the *_nnz parameter is given then the *_nz parameter is ignored 2608 2609 Storage Information: 2610 For a square global matrix we define each processor's diagonal portion 2611 to be its local rows and the corresponding columns (a square submatrix); 2612 each processor's off-diagonal portion encompasses the remainder of the 2613 local matrix (a rectangular submatrix). 2614 2615 The user can specify preallocated storage for the diagonal part of 2616 the local submatrix with either d_nz or d_nnz (not both). Set 2617 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2618 memory allocation. Likewise, specify preallocated storage for the 2619 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2620 2621 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2622 the figure below we depict these three local rows and all columns (0-11). 2623 2624 .vb 2625 0 1 2 3 4 5 6 7 8 9 10 11 2626 -------------------------- 2627 row 3 |. . . d d d o o o o o o 2628 row 4 |. . . d d d o o o o o o 2629 row 5 |. . . d d d o o o o o o 2630 -------------------------- 2631 .ve 2632 2633 Thus, any entries in the d locations are stored in the d (diagonal) 2634 submatrix, and any entries in the o locations are stored in the 2635 o (off-diagonal) submatrix. Note that the d matrix is stored in 2636 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2637 2638 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2639 plus the diagonal part of the d matrix, 2640 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2641 In general, for PDE problems in which most nonzeros are near the diagonal, 2642 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2643 or you will get TERRIBLE performance; see the users' manual chapter on 2644 matrices. 2645 2646 Level: intermediate 2647 2648 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2649 @*/ 2650 2651 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2652 { 2653 PetscErrorCode ierr; 2654 PetscMPIInt size; 2655 2656 PetscFunctionBegin; 2657 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2658 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2659 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2660 if (size > 1) { 2661 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2662 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2663 } else { 2664 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2665 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2666 } 2667 PetscFunctionReturn(0); 2668 } 2669 2670 2671 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2672 { 2673 Mat mat; 2674 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2675 PetscErrorCode ierr; 2676 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2677 PetscScalar *array; 2678 2679 PetscFunctionBegin; 2680 *newmat = 0; 2681 2682 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2683 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2684 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2685 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2686 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2687 2688 mat->factortype = matin->factortype; 2689 mat->preallocated = PETSC_TRUE; 2690 mat->assembled = PETSC_TRUE; 2691 mat->insertmode = NOT_SET_VALUES; 2692 2693 a = (Mat_MPISBAIJ*)mat->data; 2694 a->bs2 = oldmat->bs2; 2695 a->mbs = oldmat->mbs; 2696 a->nbs = oldmat->nbs; 2697 a->Mbs = oldmat->Mbs; 2698 a->Nbs = oldmat->Nbs; 2699 2700 a->size = oldmat->size; 2701 a->rank = oldmat->rank; 2702 a->donotstash = oldmat->donotstash; 2703 a->roworiented = oldmat->roworiented; 2704 a->rowindices = 0; 2705 a->rowvalues = 0; 2706 a->getrowactive = PETSC_FALSE; 2707 a->barray = 0; 2708 a->rstartbs = oldmat->rstartbs; 2709 a->rendbs = oldmat->rendbs; 2710 a->cstartbs = oldmat->cstartbs; 2711 a->cendbs = oldmat->cendbs; 2712 2713 /* hash table stuff */ 2714 a->ht = 0; 2715 a->hd = 0; 2716 a->ht_size = 0; 2717 a->ht_flag = oldmat->ht_flag; 2718 a->ht_fact = oldmat->ht_fact; 2719 a->ht_total_ct = 0; 2720 a->ht_insert_ct = 0; 2721 2722 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr); 2723 if (oldmat->colmap) { 2724 #if defined(PETSC_USE_CTABLE) 2725 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2726 #else 2727 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2728 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2729 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 2730 #endif 2731 } else a->colmap = 0; 2732 2733 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2734 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2735 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2736 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 2737 } else a->garray = 0; 2738 2739 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2740 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2741 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2742 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2743 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2744 2745 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2746 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2747 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2748 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2749 2750 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2751 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2752 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2753 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2754 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2755 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2756 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2757 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2758 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2759 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2760 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2761 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2762 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2763 2764 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2765 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2766 a->sMvctx = oldmat->sMvctx; 2767 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2768 2769 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2770 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2771 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2772 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2773 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2774 *newmat = mat; 2775 PetscFunctionReturn(0); 2776 } 2777 2778 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2779 { 2780 PetscErrorCode ierr; 2781 PetscInt i,nz,j,rstart,rend; 2782 PetscScalar *vals,*buf; 2783 MPI_Comm comm; 2784 MPI_Status status; 2785 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2786 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2787 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2788 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2789 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2790 PetscInt dcount,kmax,k,nzcount,tmp; 2791 int fd; 2792 PetscBool isbinary; 2793 2794 PetscFunctionBegin; 2795 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2796 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 2797 2798 /* force binary viewer to load .info file if it has not yet done so */ 2799 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2800 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2801 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2802 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2803 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2804 if (bs < 0) bs = 1; 2805 2806 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2807 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2808 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2809 if (!rank) { 2810 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2811 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2812 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2813 } 2814 2815 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2816 M = header[1]; 2817 N = header[2]; 2818 2819 /* If global sizes are set, check if they are consistent with that given in the file */ 2820 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2821 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2822 2823 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2824 2825 /* 2826 This code adds extra rows to make sure the number of rows is 2827 divisible by the blocksize 2828 */ 2829 Mbs = M/bs; 2830 extra_rows = bs - M + bs*(Mbs); 2831 if (extra_rows == bs) extra_rows = 0; 2832 else Mbs++; 2833 if (extra_rows &&!rank) { 2834 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2835 } 2836 2837 /* determine ownership of all rows */ 2838 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2839 mbs = Mbs/size + ((Mbs % size) > rank); 2840 m = mbs*bs; 2841 } else { /* User Set */ 2842 m = newmat->rmap->n; 2843 mbs = m/bs; 2844 } 2845 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2846 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2847 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2848 rowners[0] = 0; 2849 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2850 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2851 rstart = rowners[rank]; 2852 rend = rowners[rank+1]; 2853 2854 /* distribute row lengths to all processors */ 2855 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2856 if (!rank) { 2857 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2858 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 2859 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2860 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2861 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2862 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2863 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2864 } else { 2865 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2866 } 2867 2868 if (!rank) { /* procs[0] */ 2869 /* calculate the number of nonzeros on each processor */ 2870 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2871 for (i=0; i<size; i++) { 2872 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2873 procsnz[i] += rowlengths[j]; 2874 } 2875 } 2876 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2877 2878 /* determine max buffer needed and allocate it */ 2879 maxnz = 0; 2880 for (i=0; i<size; i++) { 2881 maxnz = PetscMax(maxnz,procsnz[i]); 2882 } 2883 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2884 2885 /* read in my part of the matrix column indices */ 2886 nz = procsnz[0]; 2887 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2888 mycols = ibuf; 2889 if (size == 1) nz -= extra_rows; 2890 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2891 if (size == 1) { 2892 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2893 } 2894 2895 /* read in every ones (except the last) and ship off */ 2896 for (i=1; i<size-1; i++) { 2897 nz = procsnz[i]; 2898 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2899 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2900 } 2901 /* read in the stuff for the last proc */ 2902 if (size != 1) { 2903 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2904 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 2905 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2906 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2907 } 2908 ierr = PetscFree(cols);CHKERRQ(ierr); 2909 } else { /* procs[i], i>0 */ 2910 /* determine buffer space needed for message */ 2911 nz = 0; 2912 for (i=0; i<m; i++) nz += locrowlens[i]; 2913 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2914 mycols = ibuf; 2915 /* receive message of column indices*/ 2916 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2917 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2918 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2919 } 2920 2921 /* loop over local rows, determining number of off diagonal entries */ 2922 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2923 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2924 rowcount = 0; 2925 nzcount = 0; 2926 for (i=0; i<mbs; i++) { 2927 dcount = 0; 2928 odcount = 0; 2929 for (j=0; j<bs; j++) { 2930 kmax = locrowlens[rowcount]; 2931 for (k=0; k<kmax; k++) { 2932 tmp = mycols[nzcount++]/bs; /* block col. index */ 2933 if (!mask[tmp]) { 2934 mask[tmp] = 1; 2935 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2936 else masked1[dcount++] = tmp; /* entry in diag portion */ 2937 } 2938 } 2939 rowcount++; 2940 } 2941 2942 dlens[i] = dcount; /* d_nzz[i] */ 2943 odlens[i] = odcount; /* o_nzz[i] */ 2944 2945 /* zero out the mask elements we set */ 2946 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2947 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2948 } 2949 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2950 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2951 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2952 2953 if (!rank) { 2954 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2955 /* read in my part of the matrix numerical values */ 2956 nz = procsnz[0]; 2957 vals = buf; 2958 mycols = ibuf; 2959 if (size == 1) nz -= extra_rows; 2960 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2961 if (size == 1) { 2962 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2963 } 2964 2965 /* insert into matrix */ 2966 jj = rstart*bs; 2967 for (i=0; i<m; i++) { 2968 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2969 mycols += locrowlens[i]; 2970 vals += locrowlens[i]; 2971 jj++; 2972 } 2973 2974 /* read in other processors (except the last one) and ship out */ 2975 for (i=1; i<size-1; i++) { 2976 nz = procsnz[i]; 2977 vals = buf; 2978 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2979 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2980 } 2981 /* the last proc */ 2982 if (size != 1) { 2983 nz = procsnz[i] - extra_rows; 2984 vals = buf; 2985 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 2986 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2987 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2988 } 2989 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2990 2991 } else { 2992 /* receive numeric values */ 2993 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2994 2995 /* receive message of values*/ 2996 vals = buf; 2997 mycols = ibuf; 2998 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2999 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3000 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3001 3002 /* insert into matrix */ 3003 jj = rstart*bs; 3004 for (i=0; i<m; i++) { 3005 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3006 mycols += locrowlens[i]; 3007 vals += locrowlens[i]; 3008 jj++; 3009 } 3010 } 3011 3012 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3013 ierr = PetscFree(buf);CHKERRQ(ierr); 3014 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3015 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3016 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3017 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3018 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3019 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3020 PetscFunctionReturn(0); 3021 } 3022 3023 /*XXXXX@ 3024 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3025 3026 Input Parameters: 3027 . mat - the matrix 3028 . fact - factor 3029 3030 Not Collective on Mat, each process can have a different hash factor 3031 3032 Level: advanced 3033 3034 Notes: 3035 This can also be set by the command line option: -mat_use_hash_table fact 3036 3037 .seealso: MatSetOption() 3038 @XXXXX*/ 3039 3040 3041 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 3042 { 3043 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 3044 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 3045 PetscReal atmp; 3046 PetscReal *work,*svalues,*rvalues; 3047 PetscErrorCode ierr; 3048 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 3049 PetscMPIInt rank,size; 3050 PetscInt *rowners_bs,dest,count,source; 3051 PetscScalar *va; 3052 MatScalar *ba; 3053 MPI_Status stat; 3054 3055 PetscFunctionBegin; 3056 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 3057 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 3058 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3059 3060 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3061 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3062 3063 bs = A->rmap->bs; 3064 mbs = a->mbs; 3065 Mbs = a->Mbs; 3066 ba = b->a; 3067 bi = b->i; 3068 bj = b->j; 3069 3070 /* find ownerships */ 3071 rowners_bs = A->rmap->range; 3072 3073 /* each proc creates an array to be distributed */ 3074 ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr); 3075 3076 /* row_max for B */ 3077 if (rank != size-1) { 3078 for (i=0; i<mbs; i++) { 3079 ncols = bi[1] - bi[0]; bi++; 3080 brow = bs*i; 3081 for (j=0; j<ncols; j++) { 3082 bcol = bs*(*bj); 3083 for (kcol=0; kcol<bs; kcol++) { 3084 col = bcol + kcol; /* local col index */ 3085 col += rowners_bs[rank+1]; /* global col index */ 3086 for (krow=0; krow<bs; krow++) { 3087 atmp = PetscAbsScalar(*ba); ba++; 3088 row = brow + krow; /* local row index */ 3089 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 3090 if (work[col] < atmp) work[col] = atmp; 3091 } 3092 } 3093 bj++; 3094 } 3095 } 3096 3097 /* send values to its owners */ 3098 for (dest=rank+1; dest<size; dest++) { 3099 svalues = work + rowners_bs[dest]; 3100 count = rowners_bs[dest+1]-rowners_bs[dest]; 3101 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 3102 } 3103 } 3104 3105 /* receive values */ 3106 if (rank) { 3107 rvalues = work; 3108 count = rowners_bs[rank+1]-rowners_bs[rank]; 3109 for (source=0; source<rank; source++) { 3110 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 3111 /* process values */ 3112 for (i=0; i<count; i++) { 3113 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 3114 } 3115 } 3116 } 3117 3118 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3119 ierr = PetscFree(work);CHKERRQ(ierr); 3120 PetscFunctionReturn(0); 3121 } 3122 3123 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3124 { 3125 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3126 PetscErrorCode ierr; 3127 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 3128 PetscScalar *x,*ptr,*from; 3129 Vec bb1; 3130 const PetscScalar *b; 3131 3132 PetscFunctionBegin; 3133 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3134 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3135 3136 if (flag == SOR_APPLY_UPPER) { 3137 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3138 PetscFunctionReturn(0); 3139 } 3140 3141 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3142 if (flag & SOR_ZERO_INITIAL_GUESS) { 3143 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3144 its--; 3145 } 3146 3147 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3148 while (its--) { 3149 3150 /* lower triangular part: slvec0b = - B^T*xx */ 3151 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3152 3153 /* copy xx into slvec0a */ 3154 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3155 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3156 ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr); 3157 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3158 3159 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3160 3161 /* copy bb into slvec1a */ 3162 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3163 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3164 ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr); 3165 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3166 3167 /* set slvec1b = 0 */ 3168 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3169 3170 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3171 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3172 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3173 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3174 3175 /* upper triangular part: bb1 = bb1 - B*x */ 3176 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3177 3178 /* local diagonal sweep */ 3179 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3180 } 3181 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3182 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3183 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3184 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3185 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3186 } else if (flag & SOR_EISENSTAT) { 3187 Vec xx1; 3188 PetscBool hasop; 3189 const PetscScalar *diag; 3190 PetscScalar *sl,scale = (omega - 2.0)/omega; 3191 PetscInt i,n; 3192 3193 if (!mat->xx1) { 3194 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3195 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3196 } 3197 xx1 = mat->xx1; 3198 bb1 = mat->bb1; 3199 3200 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3201 3202 if (!mat->diag) { 3203 /* this is wrong for same matrix with new nonzero values */ 3204 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3205 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3206 } 3207 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3208 3209 if (hasop) { 3210 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3211 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3212 } else { 3213 /* 3214 These two lines are replaced by code that may be a bit faster for a good compiler 3215 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3216 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3217 */ 3218 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3219 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3220 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3221 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3222 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3223 if (omega == 1.0) { 3224 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3225 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3226 } else { 3227 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3228 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3229 } 3230 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3231 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3232 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3233 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3234 } 3235 3236 /* multiply off-diagonal portion of matrix */ 3237 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3238 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3239 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3240 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3241 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 3242 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3243 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3244 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3245 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3246 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3247 3248 /* local sweep */ 3249 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3250 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3251 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3252 PetscFunctionReturn(0); 3253 } 3254 3255 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3256 { 3257 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3258 PetscErrorCode ierr; 3259 Vec lvec1,bb1; 3260 3261 PetscFunctionBegin; 3262 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3263 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3264 3265 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3266 if (flag & SOR_ZERO_INITIAL_GUESS) { 3267 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3268 its--; 3269 } 3270 3271 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3272 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3273 while (its--) { 3274 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3275 3276 /* lower diagonal part: bb1 = bb - B^T*xx */ 3277 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3278 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3279 3280 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3281 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3282 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3283 3284 /* upper diagonal part: bb1 = bb1 - B*x */ 3285 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3286 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3287 3288 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3289 3290 /* diagonal sweep */ 3291 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3292 } 3293 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3294 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3295 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3296 PetscFunctionReturn(0); 3297 } 3298 3299 /*@ 3300 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3301 CSR format the local rows. 3302 3303 Collective 3304 3305 Input Parameters: 3306 + comm - MPI communicator 3307 . bs - the block size, only a block size of 1 is supported 3308 . m - number of local rows (Cannot be PETSC_DECIDE) 3309 . n - This value should be the same as the local size used in creating the 3310 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3311 calculated if N is given) For square matrices n is almost always m. 3312 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3313 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3314 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 3315 . j - column indices 3316 - a - matrix values 3317 3318 Output Parameter: 3319 . mat - the matrix 3320 3321 Level: intermediate 3322 3323 Notes: 3324 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3325 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3326 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3327 3328 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3329 3330 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3331 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3332 @*/ 3333 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3334 { 3335 PetscErrorCode ierr; 3336 3337 3338 PetscFunctionBegin; 3339 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3340 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3341 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3342 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3343 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3344 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3345 PetscFunctionReturn(0); 3346 } 3347 3348 3349 /*@C 3350 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 3351 3352 Collective 3353 3354 Input Parameters: 3355 + B - the matrix 3356 . bs - the block size 3357 . i - the indices into j for the start of each local row (starts with zero) 3358 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3359 - v - optional values in the matrix 3360 3361 Level: advanced 3362 3363 Notes: 3364 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 3365 and usually the numerical values as well 3366 3367 Any entries below the diagonal are ignored 3368 3369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3370 @*/ 3371 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3372 { 3373 PetscErrorCode ierr; 3374 3375 PetscFunctionBegin; 3376 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3377 PetscFunctionReturn(0); 3378 } 3379 3380 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3381 { 3382 PetscErrorCode ierr; 3383 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3384 PetscInt *indx; 3385 PetscScalar *values; 3386 3387 PetscFunctionBegin; 3388 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3389 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3390 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3391 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3392 PetscInt *bindx,rmax=a->rmax,j; 3393 PetscMPIInt rank,size; 3394 3395 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3396 mbs = m/bs; Nbs = N/cbs; 3397 if (n == PETSC_DECIDE) { 3398 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 3399 } 3400 nbs = n/cbs; 3401 3402 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3403 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3404 3405 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3406 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 3407 if (rank == size-1) { 3408 /* Check sum(nbs) = Nbs */ 3409 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3410 } 3411 3412 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3413 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3414 for (i=0; i<mbs; i++) { 3415 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3416 nnz = nnz/bs; 3417 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3418 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3419 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3420 } 3421 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3422 ierr = PetscFree(bindx);CHKERRQ(ierr); 3423 3424 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3425 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3426 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3427 ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr); 3428 ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3429 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3430 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3431 } 3432 3433 /* numeric phase */ 3434 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3435 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3436 3437 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3438 for (i=0; i<m; i++) { 3439 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3440 Ii = i + rstart; 3441 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3442 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3443 } 3444 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3445 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3446 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3447 PetscFunctionReturn(0); 3448 } 3449