1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #if defined(PETSC_HAVE_SCALAPACK) 11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 12 #endif 13 14 /* This could be moved to matimpl.h */ 15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 16 { 17 Mat preallocator; 18 PetscInt r,rstart,rend; 19 PetscInt bs,i,m,n,M,N; 20 PetscBool cong = PETSC_TRUE; 21 PetscErrorCode ierr; 22 23 PetscFunctionBegin; 24 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 25 PetscValidLogicalCollectiveInt(B,nm,2); 26 for (i = 0; i < nm; i++) { 27 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 28 ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 29 if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 30 } 31 PetscValidLogicalCollectiveBool(B,fill,5); 32 ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 33 ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 34 ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 35 ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 36 ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 37 ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 38 ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 39 ierr = MatSetUp(preallocator);CHKERRQ(ierr); 40 ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 41 for (r = rstart; r < rend; ++r) { 42 PetscInt ncols; 43 const PetscInt *row; 44 const PetscScalar *vals; 45 46 for (i = 0; i < nm; i++) { 47 ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 48 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 49 if (symm && symm[i]) { 50 ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 51 } 52 ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 53 } 54 } 55 ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 56 ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 57 ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 58 ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 59 PetscFunctionReturn(0); 60 } 61 62 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 63 { 64 Mat B; 65 PetscErrorCode ierr; 66 PetscInt r; 67 68 PetscFunctionBegin; 69 if (reuse != MAT_REUSE_MATRIX) { 70 PetscBool symm = PETSC_TRUE,isdense; 71 PetscInt bs; 72 73 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 74 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 75 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 76 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 77 ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 78 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 79 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 80 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr); 81 if (!isdense) { 82 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 83 ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 84 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 85 } else { 86 ierr = MatSetUp(B);CHKERRQ(ierr); 87 } 88 } else { 89 B = *newmat; 90 ierr = MatZeroEntries(B);CHKERRQ(ierr); 91 } 92 93 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 94 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 95 PetscInt ncols; 96 const PetscInt *row; 97 const PetscScalar *vals; 98 99 ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 100 ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 101 #if defined(PETSC_USE_COMPLEX) 102 if (A->hermitian) { 103 PetscInt i; 104 for (i = 0; i < ncols; i++) { 105 ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr); 106 } 107 } else { 108 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 109 } 110 #else 111 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 112 #endif 113 ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 114 } 115 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 116 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 117 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 118 119 if (reuse == MAT_INPLACE_MATRIX) { 120 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 121 } else { 122 *newmat = B; 123 } 124 PetscFunctionReturn(0); 125 } 126 127 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 128 { 129 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 130 PetscErrorCode ierr; 131 132 PetscFunctionBegin; 133 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 134 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 139 { 140 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 141 PetscErrorCode ierr; 142 143 PetscFunctionBegin; 144 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 145 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 150 { \ 151 brow = row/bs; \ 152 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 153 rmax = aimax[brow]; nrow = ailen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto a_noinsert; \ 169 } \ 170 } \ 171 if (a->nonew == 1) goto a_noinsert; \ 172 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 173 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 174 N = nrow++ - 1; \ 175 /* shift up all the later entries in this row */ \ 176 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 177 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 178 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 A->nonzerostate++;\ 182 a_noinsert:; \ 183 ailen[brow] = nrow; \ 184 } 185 186 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 187 { \ 188 brow = row/bs; \ 189 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 190 rmax = bimax[brow]; nrow = bilen[brow]; \ 191 bcol = col/bs; \ 192 ridx = row % bs; cidx = col % bs; \ 193 low = 0; high = nrow; \ 194 while (high-low > 3) { \ 195 t = (low+high)/2; \ 196 if (rp[t] > bcol) high = t; \ 197 else low = t; \ 198 } \ 199 for (_i=low; _i<high; _i++) { \ 200 if (rp[_i] > bcol) break; \ 201 if (rp[_i] == bcol) { \ 202 bap = ap + bs2*_i + bs*cidx + ridx; \ 203 if (addv == ADD_VALUES) *bap += value; \ 204 else *bap = value; \ 205 goto b_noinsert; \ 206 } \ 207 } \ 208 if (b->nonew == 1) goto b_noinsert; \ 209 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 210 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 211 N = nrow++ - 1; \ 212 /* shift up all the later entries in this row */ \ 213 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 214 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 215 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 216 rp[_i] = bcol; \ 217 ap[bs2*_i + bs*cidx + ridx] = value; \ 218 B->nonzerostate++;\ 219 b_noinsert:; \ 220 bilen[brow] = nrow; \ 221 } 222 223 /* Only add/insert a(i,j) with i<=j (blocks). 224 Any a(i,j) with i>j input by user is ingored or generates an error 225 */ 226 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 227 { 228 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 229 MatScalar value; 230 PetscBool roworiented = baij->roworiented; 231 PetscErrorCode ierr; 232 PetscInt i,j,row,col; 233 PetscInt rstart_orig=mat->rmap->rstart; 234 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 235 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 236 237 /* Some Variables required in the macro */ 238 Mat A = baij->A; 239 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 240 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 241 MatScalar *aa =a->a; 242 243 Mat B = baij->B; 244 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 245 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 246 MatScalar *ba =b->a; 247 248 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 249 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 250 MatScalar *ap,*bap; 251 252 /* for stash */ 253 PetscInt n_loc, *in_loc = NULL; 254 MatScalar *v_loc = NULL; 255 256 PetscFunctionBegin; 257 if (!baij->donotstash) { 258 if (n > baij->n_loc) { 259 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 260 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 261 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 262 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 263 264 baij->n_loc = n; 265 } 266 in_loc = baij->in_loc; 267 v_loc = baij->v_loc; 268 } 269 270 for (i=0; i<m; i++) { 271 if (im[i] < 0) continue; 272 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 273 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 274 row = im[i] - rstart_orig; /* local row index */ 275 for (j=0; j<n; j++) { 276 if (im[i]/bs > in[j]/bs) { 277 if (a->ignore_ltriangular) { 278 continue; /* ignore lower triangular blocks */ 279 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 280 } 281 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 282 col = in[j] - cstart_orig; /* local col index */ 283 brow = row/bs; bcol = col/bs; 284 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 285 if (roworiented) value = v[i*n+j]; 286 else value = v[i+j*m]; 287 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 288 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 289 } else if (in[j] < 0) continue; 290 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 291 else { /* off-diag entry (B) */ 292 if (mat->was_assembled) { 293 if (!baij->colmap) { 294 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined(PETSC_USE_CTABLE) 297 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 298 col = col - 1; 299 #else 300 col = baij->colmap[in[j]/bs] - 1; 301 #endif 302 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 303 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 304 col = in[j]; 305 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 306 B = baij->B; 307 b = (Mat_SeqBAIJ*)(B)->data; 308 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 309 ba = b->a; 310 } else col += in[j]%bs; 311 } else col = in[j]; 312 if (roworiented) value = v[i*n+j]; 313 else value = v[i+j*m]; 314 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 315 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 316 } 317 } 318 } else { /* off processor entry */ 319 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 320 if (!baij->donotstash) { 321 mat->assembled = PETSC_FALSE; 322 n_loc = 0; 323 for (j=0; j<n; j++) { 324 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 325 in_loc[n_loc] = in[j]; 326 if (roworiented) { 327 v_loc[n_loc] = v[i*n+j]; 328 } else { 329 v_loc[n_loc] = v[j*m+i]; 330 } 331 n_loc++; 332 } 333 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 334 } 335 } 336 } 337 PetscFunctionReturn(0); 338 } 339 340 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 341 { 342 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 343 PetscErrorCode ierr; 344 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 345 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 346 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 347 PetscBool roworiented=a->roworiented; 348 const PetscScalar *value = v; 349 MatScalar *ap,*aa = a->a,*bap; 350 351 PetscFunctionBegin; 352 if (col < row) { 353 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 354 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 355 } 356 rp = aj + ai[row]; 357 ap = aa + bs2*ai[row]; 358 rmax = imax[row]; 359 nrow = ailen[row]; 360 value = v; 361 low = 0; 362 high = nrow; 363 364 while (high-low > 7) { 365 t = (low+high)/2; 366 if (rp[t] > col) high = t; 367 else low = t; 368 } 369 for (i=low; i<high; i++) { 370 if (rp[i] > col) break; 371 if (rp[i] == col) { 372 bap = ap + bs2*i; 373 if (roworiented) { 374 if (is == ADD_VALUES) { 375 for (ii=0; ii<bs; ii++) { 376 for (jj=ii; jj<bs2; jj+=bs) { 377 bap[jj] += *value++; 378 } 379 } 380 } else { 381 for (ii=0; ii<bs; ii++) { 382 for (jj=ii; jj<bs2; jj+=bs) { 383 bap[jj] = *value++; 384 } 385 } 386 } 387 } else { 388 if (is == ADD_VALUES) { 389 for (ii=0; ii<bs; ii++) { 390 for (jj=0; jj<bs; jj++) { 391 *bap++ += *value++; 392 } 393 } 394 } else { 395 for (ii=0; ii<bs; ii++) { 396 for (jj=0; jj<bs; jj++) { 397 *bap++ = *value++; 398 } 399 } 400 } 401 } 402 goto noinsert2; 403 } 404 } 405 if (nonew == 1) goto noinsert2; 406 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 407 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 408 N = nrow++ - 1; high++; 409 /* shift up all the later entries in this row */ 410 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 411 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 412 rp[i] = col; 413 bap = ap + bs2*i; 414 if (roworiented) { 415 for (ii=0; ii<bs; ii++) { 416 for (jj=ii; jj<bs2; jj+=bs) { 417 bap[jj] = *value++; 418 } 419 } 420 } else { 421 for (ii=0; ii<bs; ii++) { 422 for (jj=0; jj<bs; jj++) { 423 *bap++ = *value++; 424 } 425 } 426 } 427 noinsert2:; 428 ailen[row] = nrow; 429 PetscFunctionReturn(0); 430 } 431 432 /* 433 This routine is exactly duplicated in mpibaij.c 434 */ 435 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 436 { 437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 438 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 439 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 440 PetscErrorCode ierr; 441 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 442 PetscBool roworiented=a->roworiented; 443 const PetscScalar *value = v; 444 MatScalar *ap,*aa = a->a,*bap; 445 446 PetscFunctionBegin; 447 rp = aj + ai[row]; 448 ap = aa + bs2*ai[row]; 449 rmax = imax[row]; 450 nrow = ailen[row]; 451 low = 0; 452 high = nrow; 453 value = v; 454 while (high-low > 7) { 455 t = (low+high)/2; 456 if (rp[t] > col) high = t; 457 else low = t; 458 } 459 for (i=low; i<high; i++) { 460 if (rp[i] > col) break; 461 if (rp[i] == col) { 462 bap = ap + bs2*i; 463 if (roworiented) { 464 if (is == ADD_VALUES) { 465 for (ii=0; ii<bs; ii++) { 466 for (jj=ii; jj<bs2; jj+=bs) { 467 bap[jj] += *value++; 468 } 469 } 470 } else { 471 for (ii=0; ii<bs; ii++) { 472 for (jj=ii; jj<bs2; jj+=bs) { 473 bap[jj] = *value++; 474 } 475 } 476 } 477 } else { 478 if (is == ADD_VALUES) { 479 for (ii=0; ii<bs; ii++,value+=bs) { 480 for (jj=0; jj<bs; jj++) { 481 bap[jj] += value[jj]; 482 } 483 bap += bs; 484 } 485 } else { 486 for (ii=0; ii<bs; ii++,value+=bs) { 487 for (jj=0; jj<bs; jj++) { 488 bap[jj] = value[jj]; 489 } 490 bap += bs; 491 } 492 } 493 } 494 goto noinsert2; 495 } 496 } 497 if (nonew == 1) goto noinsert2; 498 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 499 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 500 N = nrow++ - 1; high++; 501 /* shift up all the later entries in this row */ 502 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 503 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 504 rp[i] = col; 505 bap = ap + bs2*i; 506 if (roworiented) { 507 for (ii=0; ii<bs; ii++) { 508 for (jj=ii; jj<bs2; jj+=bs) { 509 bap[jj] = *value++; 510 } 511 } 512 } else { 513 for (ii=0; ii<bs; ii++) { 514 for (jj=0; jj<bs; jj++) { 515 *bap++ = *value++; 516 } 517 } 518 } 519 noinsert2:; 520 ailen[row] = nrow; 521 PetscFunctionReturn(0); 522 } 523 524 /* 525 This routine could be optimized by removing the need for the block copy below and passing stride information 526 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 527 */ 528 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 529 { 530 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 531 const MatScalar *value; 532 MatScalar *barray =baij->barray; 533 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 534 PetscErrorCode ierr; 535 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 536 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 537 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 538 539 PetscFunctionBegin; 540 if (!barray) { 541 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 542 baij->barray = barray; 543 } 544 545 if (roworiented) { 546 stepval = (n-1)*bs; 547 } else { 548 stepval = (m-1)*bs; 549 } 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 553 if (im[i] >= rstart && im[i] < rend) { 554 row = im[i] - rstart; 555 for (j=0; j<n; j++) { 556 if (im[i] > in[j]) { 557 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 558 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 559 } 560 /* If NumCol = 1 then a copy is not required */ 561 if ((roworiented) && (n == 1)) { 562 barray = (MatScalar*) v + i*bs2; 563 } else if ((!roworiented) && (m == 1)) { 564 barray = (MatScalar*) v + j*bs2; 565 } else { /* Here a copy is required */ 566 if (roworiented) { 567 value = v + i*(stepval+bs)*bs + j*bs; 568 } else { 569 value = v + j*(stepval+bs)*bs + i*bs; 570 } 571 for (ii=0; ii<bs; ii++,value+=stepval) { 572 for (jj=0; jj<bs; jj++) { 573 *barray++ = *value++; 574 } 575 } 576 barray -=bs2; 577 } 578 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 582 } else if (in[j] < 0) continue; 583 else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 584 else { 585 if (mat->was_assembled) { 586 if (!baij->colmap) { 587 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 590 #if defined(PETSC_USE_DEBUG) 591 #if defined(PETSC_USE_CTABLE) 592 { PetscInt data; 593 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 594 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 595 } 596 #else 597 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 598 #endif 599 #endif 600 #if defined(PETSC_USE_CTABLE) 601 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 602 col = (col - 1)/bs; 603 #else 604 col = (baij->colmap[in[j]] - 1)/bs; 605 #endif 606 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 607 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 608 col = in[j]; 609 } 610 } else col = in[j]; 611 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 612 } 613 } 614 } else { 615 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 616 if (!baij->donotstash) { 617 if (roworiented) { 618 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 619 } else { 620 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 621 } 622 } 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 633 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 640 row = idxm[i] - bsrstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= bscstart && idxn[j] < bscend) { 645 col = idxn[j] - bscstart; 646 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!baij->colmap) { 649 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 653 data--; 654 #else 655 data = baij->colmap[idxn[j]/bs]-1; 656 #endif 657 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 658 else { 659 col = data + idxn[j]%bs; 660 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 661 } 662 } 663 } 664 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 665 } 666 PetscFunctionReturn(0); 667 } 668 669 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 670 { 671 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 672 PetscErrorCode ierr; 673 PetscReal sum[2],*lnorm2; 674 675 PetscFunctionBegin; 676 if (baij->size == 1) { 677 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 678 } else { 679 if (type == NORM_FROBENIUS) { 680 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 681 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 682 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 683 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 684 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 685 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 686 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 687 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 688 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 689 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 690 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 691 PetscReal *rsum,*rsum2,vabs; 692 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 693 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 694 MatScalar *v; 695 696 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 697 ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr); 698 /* Amat */ 699 v = amat->a; jj = amat->j; 700 for (brow=0; brow<mbs; brow++) { 701 grow = bs*(rstart + brow); 702 nz = amat->i[brow+1] - amat->i[brow]; 703 for (bcol=0; bcol<nz; bcol++) { 704 gcol = bs*(rstart + *jj); jj++; 705 for (col=0; col<bs; col++) { 706 for (row=0; row<bs; row++) { 707 vabs = PetscAbsScalar(*v); v++; 708 rsum[gcol+col] += vabs; 709 /* non-diagonal block */ 710 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 711 } 712 } 713 } 714 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 715 } 716 /* Bmat */ 717 v = bmat->a; jj = bmat->j; 718 for (brow=0; brow<mbs; brow++) { 719 grow = bs*(rstart + brow); 720 nz = bmat->i[brow+1] - bmat->i[brow]; 721 for (bcol=0; bcol<nz; bcol++) { 722 gcol = bs*garray[*jj]; jj++; 723 for (col=0; col<bs; col++) { 724 for (row=0; row<bs; row++) { 725 vabs = PetscAbsScalar(*v); v++; 726 rsum[gcol+col] += vabs; 727 rsum[grow+row] += vabs; 728 } 729 } 730 } 731 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 732 } 733 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 734 *norm = 0.0; 735 for (col=0; col<mat->cmap->N; col++) { 736 if (rsum2[col] > *norm) *norm = rsum2[col]; 737 } 738 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 739 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 740 } 741 PetscFunctionReturn(0); 742 } 743 744 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 745 { 746 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 747 PetscErrorCode ierr; 748 PetscInt nstash,reallocs; 749 750 PetscFunctionBegin; 751 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 752 753 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 754 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 755 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 756 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 757 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 758 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } 761 762 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 763 { 764 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 765 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 766 PetscErrorCode ierr; 767 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 768 PetscInt *row,*col; 769 PetscBool other_disassembled; 770 PetscMPIInt n; 771 PetscBool r1,r2,r3; 772 MatScalar *val; 773 774 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 775 PetscFunctionBegin; 776 if (!baij->donotstash && !mat->nooffprocentries) { 777 while (1) { 778 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 779 if (!flg) break; 780 781 for (i=0; i<n;) { 782 /* Now identify the consecutive vals belonging to the same row */ 783 for (j=i,rstart=row[j]; j<n; j++) { 784 if (row[j] != rstart) break; 785 } 786 if (j < n) ncols = j-i; 787 else ncols = n-i; 788 /* Now assemble all these values with a single function call */ 789 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 790 i = j; 791 } 792 } 793 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 794 /* Now process the block-stash. Since the values are stashed column-oriented, 795 set the roworiented flag to column oriented, and after MatSetValues() 796 restore the original flags */ 797 r1 = baij->roworiented; 798 r2 = a->roworiented; 799 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 800 801 baij->roworiented = PETSC_FALSE; 802 a->roworiented = PETSC_FALSE; 803 804 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 805 while (1) { 806 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 807 if (!flg) break; 808 809 for (i=0; i<n;) { 810 /* Now identify the consecutive vals belonging to the same row */ 811 for (j=i,rstart=row[j]; j<n; j++) { 812 if (row[j] != rstart) break; 813 } 814 if (j < n) ncols = j-i; 815 else ncols = n-i; 816 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 817 i = j; 818 } 819 } 820 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 821 822 baij->roworiented = r1; 823 a->roworiented = r2; 824 825 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 826 } 827 828 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 829 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 830 831 /* determine if any processor has disassembled, if so we must 832 also disassemble ourselfs, in order that we may reassemble. */ 833 /* 834 if nonzero structure of submatrix B cannot change then we know that 835 no processor disassembled thus we can skip this stuff 836 */ 837 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 838 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 839 if (mat->was_assembled && !other_disassembled) { 840 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 841 } 842 } 843 844 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 845 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 846 } 847 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 848 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 849 850 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 851 852 baij->rowvalues = 0; 853 854 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 855 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 856 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 857 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 858 } 859 PetscFunctionReturn(0); 860 } 861 862 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 863 #include <petscdraw.h> 864 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 865 { 866 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 867 PetscErrorCode ierr; 868 PetscInt bs = mat->rmap->bs; 869 PetscMPIInt rank = baij->rank; 870 PetscBool iascii,isdraw; 871 PetscViewer sviewer; 872 PetscViewerFormat format; 873 874 PetscFunctionBegin; 875 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 876 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 877 if (iascii) { 878 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 879 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 880 MatInfo info; 881 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 882 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 883 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 884 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 885 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 886 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 887 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 889 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 890 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 891 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 892 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 893 PetscFunctionReturn(0); 894 } else if (format == PETSC_VIEWER_ASCII_INFO) { 895 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 896 PetscFunctionReturn(0); 897 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 898 PetscFunctionReturn(0); 899 } 900 } 901 902 if (isdraw) { 903 PetscDraw draw; 904 PetscBool isnull; 905 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 906 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 907 if (isnull) PetscFunctionReturn(0); 908 } 909 910 { 911 /* assemble the entire matrix onto first processor. */ 912 Mat A; 913 Mat_SeqSBAIJ *Aloc; 914 Mat_SeqBAIJ *Bloc; 915 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 916 MatScalar *a; 917 const char *matname; 918 919 /* Should this be the same type as mat? */ 920 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 921 if (!rank) { 922 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 923 } else { 924 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 925 } 926 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 927 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 928 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 929 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 930 931 /* copy over the A part */ 932 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 933 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 934 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 935 936 for (i=0; i<mbs; i++) { 937 rvals[0] = bs*(baij->rstartbs + i); 938 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 939 for (j=ai[i]; j<ai[i+1]; j++) { 940 col = (baij->cstartbs+aj[j])*bs; 941 for (k=0; k<bs; k++) { 942 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 943 col++; 944 a += bs; 945 } 946 } 947 } 948 /* copy over the B part */ 949 Bloc = (Mat_SeqBAIJ*)baij->B->data; 950 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 951 for (i=0; i<mbs; i++) { 952 953 rvals[0] = bs*(baij->rstartbs + i); 954 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 955 for (j=ai[i]; j<ai[i+1]; j++) { 956 col = baij->garray[aj[j]]*bs; 957 for (k=0; k<bs; k++) { 958 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 959 col++; 960 a += bs; 961 } 962 } 963 } 964 ierr = PetscFree(rvals);CHKERRQ(ierr); 965 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967 /* 968 Everyone has to call to draw the matrix since the graphics waits are 969 synchronized across all processors that share the PetscDraw object 970 */ 971 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 972 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 973 if (!rank) { 974 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 975 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 976 } 977 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 978 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 979 ierr = MatDestroy(&A);CHKERRQ(ierr); 980 } 981 PetscFunctionReturn(0); 982 } 983 984 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 985 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 986 987 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 988 { 989 PetscErrorCode ierr; 990 PetscBool iascii,isdraw,issocket,isbinary; 991 992 PetscFunctionBegin; 993 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 994 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 995 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 996 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 997 if (iascii || isdraw || issocket) { 998 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 999 } else if (isbinary) { 1000 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1001 } 1002 PetscFunctionReturn(0); 1003 } 1004 1005 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1006 { 1007 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1008 PetscErrorCode ierr; 1009 1010 PetscFunctionBegin; 1011 #if defined(PETSC_USE_LOG) 1012 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1013 #endif 1014 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1015 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1016 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1017 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1018 #if defined(PETSC_USE_CTABLE) 1019 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1020 #else 1021 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1022 #endif 1023 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1024 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1025 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1026 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1027 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1028 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1029 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1030 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1031 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1032 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1033 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1034 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1035 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1036 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1037 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1038 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1039 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1040 #endif 1041 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1042 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1043 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1044 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1045 1046 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1047 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1048 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1049 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1050 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1051 #if defined(PETSC_HAVE_ELEMENTAL) 1052 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1053 #endif 1054 #if defined(PETSC_HAVE_SCALAPACK) 1055 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL);CHKERRQ(ierr); 1056 #endif 1057 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1058 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1063 { 1064 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1065 PetscErrorCode ierr; 1066 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1067 PetscScalar *from; 1068 const PetscScalar *x; 1069 1070 PetscFunctionBegin; 1071 /* diagonal part */ 1072 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1073 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1074 1075 /* subdiagonal part */ 1076 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1077 1078 /* copy x into the vec slvec0 */ 1079 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1080 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1081 1082 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1083 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1084 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1085 1086 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 /* supperdiagonal part */ 1089 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1094 { 1095 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1098 PetscScalar *from; 1099 const PetscScalar *x; 1100 1101 PetscFunctionBegin; 1102 /* diagonal part */ 1103 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1104 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1105 1106 /* subdiagonal part */ 1107 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1108 1109 /* copy x into the vec slvec0 */ 1110 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1111 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1112 1113 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1114 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1115 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1116 1117 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 /* supperdiagonal part */ 1120 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz) 1125 { 1126 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1129 PetscScalar *from,zero=0.0; 1130 const PetscScalar *x; 1131 1132 PetscFunctionBegin; 1133 /* diagonal part */ 1134 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1135 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1136 1137 /* subdiagonal part */ 1138 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1139 1140 /* copy x into the vec slvec0 */ 1141 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1142 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1143 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1144 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1145 1146 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1147 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1148 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1149 1150 /* supperdiagonal part */ 1151 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1156 { 1157 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1158 PetscErrorCode ierr; 1159 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1160 PetscScalar *from,zero=0.0; 1161 const PetscScalar *x; 1162 1163 PetscFunctionBegin; 1164 /* diagonal part */ 1165 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1166 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1167 1168 /* subdiagonal part */ 1169 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1170 1171 /* copy x into the vec slvec0 */ 1172 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1173 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1174 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1175 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1176 1177 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1178 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1179 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1180 1181 /* supperdiagonal part */ 1182 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1183 PetscFunctionReturn(0); 1184 } 1185 1186 /* 1187 This only works correctly for square matrices where the subblock A->A is the 1188 diagonal block 1189 */ 1190 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1191 { 1192 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1197 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1202 { 1203 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1208 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1213 { 1214 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1215 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1216 PetscErrorCode ierr; 1217 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1218 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1219 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1220 1221 PetscFunctionBegin; 1222 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1223 mat->getrowactive = PETSC_TRUE; 1224 1225 if (!mat->rowvalues && (idx || v)) { 1226 /* 1227 allocate enough space to hold information from the longest row. 1228 */ 1229 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1230 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1231 PetscInt max = 1,mbs = mat->mbs,tmp; 1232 for (i=0; i<mbs; i++) { 1233 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1234 if (max < tmp) max = tmp; 1235 } 1236 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1237 } 1238 1239 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1240 lrow = row - brstart; /* local row index */ 1241 1242 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1243 if (!v) {pvA = 0; pvB = 0;} 1244 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1245 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1246 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1247 nztot = nzA + nzB; 1248 1249 cmap = mat->garray; 1250 if (v || idx) { 1251 if (nztot) { 1252 /* Sort by increasing column numbers, assuming A and B already sorted */ 1253 PetscInt imark = -1; 1254 if (v) { 1255 *v = v_p = mat->rowvalues; 1256 for (i=0; i<nzB; i++) { 1257 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1258 else break; 1259 } 1260 imark = i; 1261 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1262 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1263 } 1264 if (idx) { 1265 *idx = idx_p = mat->rowindices; 1266 if (imark > -1) { 1267 for (i=0; i<imark; i++) { 1268 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1269 } 1270 } else { 1271 for (i=0; i<nzB; i++) { 1272 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1273 else break; 1274 } 1275 imark = i; 1276 } 1277 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1278 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1279 } 1280 } else { 1281 if (idx) *idx = 0; 1282 if (v) *v = 0; 1283 } 1284 } 1285 *nz = nztot; 1286 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1287 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1292 { 1293 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1294 1295 PetscFunctionBegin; 1296 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1297 baij->getrowactive = PETSC_FALSE; 1298 PetscFunctionReturn(0); 1299 } 1300 1301 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1302 { 1303 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1304 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1305 1306 PetscFunctionBegin; 1307 aA->getrow_utriangular = PETSC_TRUE; 1308 PetscFunctionReturn(0); 1309 } 1310 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1311 { 1312 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1313 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1314 1315 PetscFunctionBegin; 1316 aA->getrow_utriangular = PETSC_FALSE; 1317 PetscFunctionReturn(0); 1318 } 1319 1320 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1321 { 1322 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1323 PetscErrorCode ierr; 1324 1325 PetscFunctionBegin; 1326 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1327 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1332 { 1333 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1334 PetscErrorCode ierr; 1335 1336 PetscFunctionBegin; 1337 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1338 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1343 Input: isrow - distributed(parallel), 1344 iscol_local - locally owned (seq) 1345 */ 1346 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1347 { 1348 PetscErrorCode ierr; 1349 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1350 const PetscInt *ptr1,*ptr2; 1351 1352 PetscFunctionBegin; 1353 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1354 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1355 if (sz1 > sz2) { 1356 *flg = PETSC_FALSE; 1357 PetscFunctionReturn(0); 1358 } 1359 1360 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1361 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1362 1363 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1364 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1365 ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr); 1366 ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr); 1367 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1368 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1369 1370 nmatch=0; 1371 k = 0; 1372 for (i=0; i<sz1; i++){ 1373 for (j=k; j<sz2; j++){ 1374 if (a1[i] == a2[j]) { 1375 k = j; nmatch++; 1376 break; 1377 } 1378 } 1379 } 1380 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1381 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1382 ierr = PetscFree(a1);CHKERRQ(ierr); 1383 ierr = PetscFree(a2);CHKERRQ(ierr); 1384 if (nmatch < sz1) { 1385 *flg = PETSC_FALSE; 1386 } else { 1387 *flg = PETSC_TRUE; 1388 } 1389 PetscFunctionReturn(0); 1390 } 1391 1392 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1393 { 1394 PetscErrorCode ierr; 1395 IS iscol_local; 1396 PetscInt csize; 1397 PetscBool isequal; 1398 1399 PetscFunctionBegin; 1400 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1401 if (call == MAT_REUSE_MATRIX) { 1402 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1403 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1404 } else { 1405 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1406 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1407 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1408 } 1409 1410 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1411 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1412 if (call == MAT_INITIAL_MATRIX) { 1413 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1414 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1415 } 1416 PetscFunctionReturn(0); 1417 } 1418 1419 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1420 { 1421 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1422 PetscErrorCode ierr; 1423 1424 PetscFunctionBegin; 1425 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1426 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1427 PetscFunctionReturn(0); 1428 } 1429 1430 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1431 { 1432 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1433 Mat A = a->A,B = a->B; 1434 PetscErrorCode ierr; 1435 PetscLogDouble isend[5],irecv[5]; 1436 1437 PetscFunctionBegin; 1438 info->block_size = (PetscReal)matin->rmap->bs; 1439 1440 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1441 1442 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1443 isend[3] = info->memory; isend[4] = info->mallocs; 1444 1445 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1446 1447 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1448 isend[3] += info->memory; isend[4] += info->mallocs; 1449 if (flag == MAT_LOCAL) { 1450 info->nz_used = isend[0]; 1451 info->nz_allocated = isend[1]; 1452 info->nz_unneeded = isend[2]; 1453 info->memory = isend[3]; 1454 info->mallocs = isend[4]; 1455 } else if (flag == MAT_GLOBAL_MAX) { 1456 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1457 1458 info->nz_used = irecv[0]; 1459 info->nz_allocated = irecv[1]; 1460 info->nz_unneeded = irecv[2]; 1461 info->memory = irecv[3]; 1462 info->mallocs = irecv[4]; 1463 } else if (flag == MAT_GLOBAL_SUM) { 1464 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1465 1466 info->nz_used = irecv[0]; 1467 info->nz_allocated = irecv[1]; 1468 info->nz_unneeded = irecv[2]; 1469 info->memory = irecv[3]; 1470 info->mallocs = irecv[4]; 1471 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1472 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1473 info->fill_ratio_needed = 0; 1474 info->factor_mallocs = 0; 1475 PetscFunctionReturn(0); 1476 } 1477 1478 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1479 { 1480 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1481 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1482 PetscErrorCode ierr; 1483 1484 PetscFunctionBegin; 1485 switch (op) { 1486 case MAT_NEW_NONZERO_LOCATIONS: 1487 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1488 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1489 case MAT_KEEP_NONZERO_PATTERN: 1490 case MAT_SUBMAT_SINGLEIS: 1491 case MAT_NEW_NONZERO_LOCATION_ERR: 1492 MatCheckPreallocated(A,1); 1493 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1494 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1495 break; 1496 case MAT_ROW_ORIENTED: 1497 MatCheckPreallocated(A,1); 1498 a->roworiented = flg; 1499 1500 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1501 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1502 break; 1503 case MAT_NEW_DIAGONALS: 1504 case MAT_SORTED_FULL: 1505 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1506 break; 1507 case MAT_IGNORE_OFF_PROC_ENTRIES: 1508 a->donotstash = flg; 1509 break; 1510 case MAT_USE_HASH_TABLE: 1511 a->ht_flag = flg; 1512 break; 1513 case MAT_HERMITIAN: 1514 MatCheckPreallocated(A,1); 1515 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1516 #if defined(PETSC_USE_COMPLEX) 1517 if (flg) { /* need different mat-vec ops */ 1518 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1519 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1520 A->ops->multtranspose = NULL; 1521 A->ops->multtransposeadd = NULL; 1522 A->symmetric = PETSC_FALSE; 1523 } 1524 #endif 1525 break; 1526 case MAT_SPD: 1527 case MAT_SYMMETRIC: 1528 MatCheckPreallocated(A,1); 1529 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1530 #if defined(PETSC_USE_COMPLEX) 1531 if (flg) { /* restore to use default mat-vec ops */ 1532 A->ops->mult = MatMult_MPISBAIJ; 1533 A->ops->multadd = MatMultAdd_MPISBAIJ; 1534 A->ops->multtranspose = MatMult_MPISBAIJ; 1535 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1536 } 1537 #endif 1538 break; 1539 case MAT_STRUCTURALLY_SYMMETRIC: 1540 MatCheckPreallocated(A,1); 1541 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1542 break; 1543 case MAT_SYMMETRY_ETERNAL: 1544 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1545 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1546 break; 1547 case MAT_IGNORE_LOWER_TRIANGULAR: 1548 aA->ignore_ltriangular = flg; 1549 break; 1550 case MAT_ERROR_LOWER_TRIANGULAR: 1551 aA->ignore_ltriangular = flg; 1552 break; 1553 case MAT_GETROW_UPPERTRIANGULAR: 1554 aA->getrow_utriangular = flg; 1555 break; 1556 default: 1557 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1558 } 1559 PetscFunctionReturn(0); 1560 } 1561 1562 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1563 { 1564 PetscErrorCode ierr; 1565 1566 PetscFunctionBegin; 1567 if (reuse == MAT_INITIAL_MATRIX) { 1568 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1569 } else if (reuse == MAT_REUSE_MATRIX) { 1570 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1571 } 1572 PetscFunctionReturn(0); 1573 } 1574 1575 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1576 { 1577 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1578 Mat a = baij->A, b=baij->B; 1579 PetscErrorCode ierr; 1580 PetscInt nv,m,n; 1581 PetscBool flg; 1582 1583 PetscFunctionBegin; 1584 if (ll != rr) { 1585 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1586 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1587 } 1588 if (!ll) PetscFunctionReturn(0); 1589 1590 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1591 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1592 1593 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1594 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1595 1596 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1597 1598 /* left diagonalscale the off-diagonal part */ 1599 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1600 1601 /* scale the diagonal part */ 1602 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1603 1604 /* right diagonalscale the off-diagonal part */ 1605 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1611 { 1612 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1613 PetscErrorCode ierr; 1614 1615 PetscFunctionBegin; 1616 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1617 PetscFunctionReturn(0); 1618 } 1619 1620 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1621 1622 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1623 { 1624 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1625 Mat a,b,c,d; 1626 PetscBool flg; 1627 PetscErrorCode ierr; 1628 1629 PetscFunctionBegin; 1630 a = matA->A; b = matA->B; 1631 c = matB->A; d = matB->B; 1632 1633 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1634 if (flg) { 1635 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1636 } 1637 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1638 PetscFunctionReturn(0); 1639 } 1640 1641 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1642 { 1643 PetscErrorCode ierr; 1644 PetscBool isbaij; 1645 1646 PetscFunctionBegin; 1647 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 1648 if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1649 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1650 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1651 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1652 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1653 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1654 } else { 1655 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1656 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1657 1658 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1659 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1660 } 1661 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1662 PetscFunctionReturn(0); 1663 } 1664 1665 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1666 { 1667 PetscErrorCode ierr; 1668 1669 PetscFunctionBegin; 1670 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1671 PetscFunctionReturn(0); 1672 } 1673 1674 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1675 { 1676 PetscErrorCode ierr; 1677 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1678 PetscBLASInt bnz,one=1; 1679 Mat_SeqSBAIJ *xa,*ya; 1680 Mat_SeqBAIJ *xb,*yb; 1681 1682 PetscFunctionBegin; 1683 if (str == SAME_NONZERO_PATTERN) { 1684 PetscScalar alpha = a; 1685 xa = (Mat_SeqSBAIJ*)xx->A->data; 1686 ya = (Mat_SeqSBAIJ*)yy->A->data; 1687 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1688 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1689 xb = (Mat_SeqBAIJ*)xx->B->data; 1690 yb = (Mat_SeqBAIJ*)yy->B->data; 1691 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1692 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1693 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1694 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1695 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1696 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1697 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1698 } else { 1699 Mat B; 1700 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1701 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1702 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1703 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1704 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1705 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1706 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1707 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1708 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1709 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1710 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1711 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1712 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1713 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1714 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1715 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1716 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1717 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1718 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1719 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1720 } 1721 PetscFunctionReturn(0); 1722 } 1723 1724 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1725 { 1726 PetscErrorCode ierr; 1727 PetscInt i; 1728 PetscBool flg; 1729 1730 PetscFunctionBegin; 1731 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1732 for (i=0; i<n; i++) { 1733 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1734 if (!flg) { 1735 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1736 } 1737 } 1738 PetscFunctionReturn(0); 1739 } 1740 1741 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1742 { 1743 PetscErrorCode ierr; 1744 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1745 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1746 1747 PetscFunctionBegin; 1748 if (!Y->preallocated) { 1749 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1750 } else if (!aij->nz) { 1751 PetscInt nonew = aij->nonew; 1752 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1753 aij->nonew = nonew; 1754 } 1755 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 1759 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1760 { 1761 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1762 PetscErrorCode ierr; 1763 1764 PetscFunctionBegin; 1765 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1766 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1767 if (d) { 1768 PetscInt rstart; 1769 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1770 *d += rstart/A->rmap->bs; 1771 1772 } 1773 PetscFunctionReturn(0); 1774 } 1775 1776 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1777 { 1778 PetscFunctionBegin; 1779 *a = ((Mat_MPISBAIJ*)A->data)->A; 1780 PetscFunctionReturn(0); 1781 } 1782 1783 /* -------------------------------------------------------------------*/ 1784 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1785 MatGetRow_MPISBAIJ, 1786 MatRestoreRow_MPISBAIJ, 1787 MatMult_MPISBAIJ, 1788 /* 4*/ MatMultAdd_MPISBAIJ, 1789 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1790 MatMultAdd_MPISBAIJ, 1791 0, 1792 0, 1793 0, 1794 /* 10*/ 0, 1795 0, 1796 0, 1797 MatSOR_MPISBAIJ, 1798 MatTranspose_MPISBAIJ, 1799 /* 15*/ MatGetInfo_MPISBAIJ, 1800 MatEqual_MPISBAIJ, 1801 MatGetDiagonal_MPISBAIJ, 1802 MatDiagonalScale_MPISBAIJ, 1803 MatNorm_MPISBAIJ, 1804 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1805 MatAssemblyEnd_MPISBAIJ, 1806 MatSetOption_MPISBAIJ, 1807 MatZeroEntries_MPISBAIJ, 1808 /* 24*/ 0, 1809 0, 1810 0, 1811 0, 1812 0, 1813 /* 29*/ MatSetUp_MPISBAIJ, 1814 0, 1815 0, 1816 MatGetDiagonalBlock_MPISBAIJ, 1817 0, 1818 /* 34*/ MatDuplicate_MPISBAIJ, 1819 0, 1820 0, 1821 0, 1822 0, 1823 /* 39*/ MatAXPY_MPISBAIJ, 1824 MatCreateSubMatrices_MPISBAIJ, 1825 MatIncreaseOverlap_MPISBAIJ, 1826 MatGetValues_MPISBAIJ, 1827 MatCopy_MPISBAIJ, 1828 /* 44*/ 0, 1829 MatScale_MPISBAIJ, 1830 MatShift_MPISBAIJ, 1831 0, 1832 0, 1833 /* 49*/ 0, 1834 0, 1835 0, 1836 0, 1837 0, 1838 /* 54*/ 0, 1839 0, 1840 MatSetUnfactored_MPISBAIJ, 1841 0, 1842 MatSetValuesBlocked_MPISBAIJ, 1843 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1844 0, 1845 0, 1846 0, 1847 0, 1848 /* 64*/ 0, 1849 0, 1850 0, 1851 0, 1852 0, 1853 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1854 0, 1855 MatConvert_MPISBAIJ_Basic, 1856 0, 1857 0, 1858 /* 74*/ 0, 1859 0, 1860 0, 1861 0, 1862 0, 1863 /* 79*/ 0, 1864 0, 1865 0, 1866 0, 1867 MatLoad_MPISBAIJ, 1868 /* 84*/ 0, 1869 0, 1870 0, 1871 0, 1872 0, 1873 /* 89*/ 0, 1874 0, 1875 0, 1876 0, 1877 0, 1878 /* 94*/ 0, 1879 0, 1880 0, 1881 0, 1882 0, 1883 /* 99*/ 0, 1884 0, 1885 0, 1886 0, 1887 0, 1888 /*104*/ 0, 1889 MatRealPart_MPISBAIJ, 1890 MatImaginaryPart_MPISBAIJ, 1891 MatGetRowUpperTriangular_MPISBAIJ, 1892 MatRestoreRowUpperTriangular_MPISBAIJ, 1893 /*109*/ 0, 1894 0, 1895 0, 1896 0, 1897 MatMissingDiagonal_MPISBAIJ, 1898 /*114*/ 0, 1899 0, 1900 0, 1901 0, 1902 0, 1903 /*119*/ 0, 1904 0, 1905 0, 1906 0, 1907 0, 1908 /*124*/ 0, 1909 0, 1910 0, 1911 0, 1912 0, 1913 /*129*/ 0, 1914 0, 1915 0, 1916 0, 1917 0, 1918 /*134*/ 0, 1919 0, 1920 0, 1921 0, 1922 0, 1923 /*139*/ MatSetBlockSizes_Default, 1924 0, 1925 0, 1926 0, 1927 0, 1928 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1929 }; 1930 1931 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1932 { 1933 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1934 PetscErrorCode ierr; 1935 PetscInt i,mbs,Mbs; 1936 PetscMPIInt size; 1937 1938 PetscFunctionBegin; 1939 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1940 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1941 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1942 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1943 if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N); 1944 if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n); 1945 1946 mbs = B->rmap->n/bs; 1947 Mbs = B->rmap->N/bs; 1948 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1949 1950 B->rmap->bs = bs; 1951 b->bs2 = bs*bs; 1952 b->mbs = mbs; 1953 b->Mbs = Mbs; 1954 b->nbs = B->cmap->n/bs; 1955 b->Nbs = B->cmap->N/bs; 1956 1957 for (i=0; i<=b->size; i++) { 1958 b->rangebs[i] = B->rmap->range[i]/bs; 1959 } 1960 b->rstartbs = B->rmap->rstart/bs; 1961 b->rendbs = B->rmap->rend/bs; 1962 1963 b->cstartbs = B->cmap->rstart/bs; 1964 b->cendbs = B->cmap->rend/bs; 1965 1966 #if defined(PETSC_USE_CTABLE) 1967 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 1968 #else 1969 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 1970 #endif 1971 ierr = PetscFree(b->garray);CHKERRQ(ierr); 1972 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 1973 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 1974 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 1975 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 1976 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 1977 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 1978 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 1979 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 1980 1981 /* Because the B will have been resized we simply destroy it and create a new one each time */ 1982 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1983 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 1984 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1985 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 1986 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1987 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1988 1989 if (!B->preallocated) { 1990 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1991 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1992 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1993 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1994 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1995 } 1996 1997 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1998 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1999 2000 B->preallocated = PETSC_TRUE; 2001 B->was_assembled = PETSC_FALSE; 2002 B->assembled = PETSC_FALSE; 2003 PetscFunctionReturn(0); 2004 } 2005 2006 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2007 { 2008 PetscInt m,rstart,cend; 2009 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0; 2010 const PetscInt *JJ =0; 2011 PetscScalar *values=0; 2012 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 2013 PetscErrorCode ierr; 2014 PetscBool nooffprocentries; 2015 2016 PetscFunctionBegin; 2017 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2018 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2019 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2020 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2021 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2022 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2023 m = B->rmap->n/bs; 2024 rstart = B->rmap->rstart/bs; 2025 cend = B->cmap->rend/bs; 2026 2027 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2028 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2029 for (i=0; i<m; i++) { 2030 nz = ii[i+1] - ii[i]; 2031 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2032 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2033 JJ = jj + ii[i]; 2034 bd = 0; 2035 for (j=0; j<nz; j++) { 2036 if (*JJ >= i + rstart) break; 2037 JJ++; 2038 bd++; 2039 } 2040 d = 0; 2041 for (; j<nz; j++) { 2042 if (*JJ++ >= cend) break; 2043 d++; 2044 } 2045 d_nnz[i] = d; 2046 o_nnz[i] = nz - d - bd; 2047 nz = nz - bd; 2048 nz_max = PetscMax(nz_max,nz); 2049 } 2050 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2051 ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2052 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2053 2054 values = (PetscScalar*)V; 2055 if (!values) { 2056 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2057 } 2058 for (i=0; i<m; i++) { 2059 PetscInt row = i + rstart; 2060 PetscInt ncols = ii[i+1] - ii[i]; 2061 const PetscInt *icols = jj + ii[i]; 2062 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2063 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2064 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2065 } else { /* block ordering does not match so we can only insert one block at a time. */ 2066 PetscInt j; 2067 for (j=0; j<ncols; j++) { 2068 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2069 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2070 } 2071 } 2072 } 2073 2074 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2075 nooffprocentries = B->nooffprocentries; 2076 B->nooffprocentries = PETSC_TRUE; 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 B->nooffprocentries = nooffprocentries; 2080 2081 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2082 PetscFunctionReturn(0); 2083 } 2084 2085 /*MC 2086 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2087 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2088 the matrix is stored. 2089 2090 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2091 can call MatSetOption(Mat, MAT_HERMITIAN); 2092 2093 Options Database Keys: 2094 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2095 2096 Notes: 2097 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2098 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2099 2100 Level: beginner 2101 2102 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType 2103 M*/ 2104 2105 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2106 { 2107 Mat_MPISBAIJ *b; 2108 PetscErrorCode ierr; 2109 PetscBool flg = PETSC_FALSE; 2110 2111 PetscFunctionBegin; 2112 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2113 B->data = (void*)b; 2114 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2115 2116 B->ops->destroy = MatDestroy_MPISBAIJ; 2117 B->ops->view = MatView_MPISBAIJ; 2118 B->assembled = PETSC_FALSE; 2119 B->insertmode = NOT_SET_VALUES; 2120 2121 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2122 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2123 2124 /* build local table of row and column ownerships */ 2125 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2126 2127 /* build cache for off array entries formed */ 2128 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2129 2130 b->donotstash = PETSC_FALSE; 2131 b->colmap = NULL; 2132 b->garray = NULL; 2133 b->roworiented = PETSC_TRUE; 2134 2135 /* stuff used in block assembly */ 2136 b->barray = 0; 2137 2138 /* stuff used for matrix vector multiply */ 2139 b->lvec = 0; 2140 b->Mvctx = 0; 2141 b->slvec0 = 0; 2142 b->slvec0b = 0; 2143 b->slvec1 = 0; 2144 b->slvec1a = 0; 2145 b->slvec1b = 0; 2146 b->sMvctx = 0; 2147 2148 /* stuff for MatGetRow() */ 2149 b->rowindices = 0; 2150 b->rowvalues = 0; 2151 b->getrowactive = PETSC_FALSE; 2152 2153 /* hash table stuff */ 2154 b->ht = 0; 2155 b->hd = 0; 2156 b->ht_size = 0; 2157 b->ht_flag = PETSC_FALSE; 2158 b->ht_fact = 0; 2159 b->ht_total_ct = 0; 2160 b->ht_insert_ct = 0; 2161 2162 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2163 b->ijonly = PETSC_FALSE; 2164 2165 b->in_loc = 0; 2166 b->v_loc = 0; 2167 b->n_loc = 0; 2168 2169 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2170 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2171 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2172 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2173 #if defined(PETSC_HAVE_ELEMENTAL) 2174 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2175 #endif 2176 #if defined(PETSC_HAVE_SCALAPACK) 2177 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK);CHKERRQ(ierr); 2178 #endif 2179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2181 2182 B->symmetric = PETSC_TRUE; 2183 B->structurally_symmetric = PETSC_TRUE; 2184 B->symmetric_set = PETSC_TRUE; 2185 B->structurally_symmetric_set = PETSC_TRUE; 2186 B->symmetric_eternal = PETSC_TRUE; 2187 #if defined(PETSC_USE_COMPLEX) 2188 B->hermitian = PETSC_FALSE; 2189 B->hermitian_set = PETSC_FALSE; 2190 #else 2191 B->hermitian = PETSC_TRUE; 2192 B->hermitian_set = PETSC_TRUE; 2193 #endif 2194 2195 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2197 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2198 if (flg) { 2199 PetscReal fact = 1.39; 2200 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2201 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2202 if (fact <= 1.0) fact = 1.39; 2203 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2204 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2205 } 2206 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2207 PetscFunctionReturn(0); 2208 } 2209 2210 /*MC 2211 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2212 2213 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2214 and MATMPISBAIJ otherwise. 2215 2216 Options Database Keys: 2217 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2218 2219 Level: beginner 2220 2221 .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ 2222 M*/ 2223 2224 /*@C 2225 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2226 the user should preallocate the matrix storage by setting the parameters 2227 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2228 performance can be increased by more than a factor of 50. 2229 2230 Collective on Mat 2231 2232 Input Parameters: 2233 + B - the matrix 2234 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2235 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2236 . d_nz - number of block nonzeros per block row in diagonal portion of local 2237 submatrix (same for all local rows) 2238 . d_nnz - array containing the number of block nonzeros in the various block rows 2239 in the upper triangular and diagonal part of the in diagonal portion of the local 2240 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2241 for the diagonal entry and set a value even if it is zero. 2242 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2243 submatrix (same for all local rows). 2244 - o_nnz - array containing the number of nonzeros in the various block rows of the 2245 off-diagonal portion of the local submatrix that is right of the diagonal 2246 (possibly different for each block row) or NULL. 2247 2248 2249 Options Database Keys: 2250 + -mat_no_unroll - uses code that does not unroll the loops in the 2251 block calculations (much slower) 2252 - -mat_block_size - size of the blocks to use 2253 2254 Notes: 2255 2256 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2257 than it must be used on all processors that share the object for that argument. 2258 2259 If the *_nnz parameter is given then the *_nz parameter is ignored 2260 2261 Storage Information: 2262 For a square global matrix we define each processor's diagonal portion 2263 to be its local rows and the corresponding columns (a square submatrix); 2264 each processor's off-diagonal portion encompasses the remainder of the 2265 local matrix (a rectangular submatrix). 2266 2267 The user can specify preallocated storage for the diagonal part of 2268 the local submatrix with either d_nz or d_nnz (not both). Set 2269 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2270 memory allocation. Likewise, specify preallocated storage for the 2271 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2272 2273 You can call MatGetInfo() to get information on how effective the preallocation was; 2274 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2275 You can also run with the option -info and look for messages with the string 2276 malloc in them to see if additional memory allocation was needed. 2277 2278 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2279 the figure below we depict these three local rows and all columns (0-11). 2280 2281 .vb 2282 0 1 2 3 4 5 6 7 8 9 10 11 2283 -------------------------- 2284 row 3 |. . . d d d o o o o o o 2285 row 4 |. . . d d d o o o o o o 2286 row 5 |. . . d d d o o o o o o 2287 -------------------------- 2288 .ve 2289 2290 Thus, any entries in the d locations are stored in the d (diagonal) 2291 submatrix, and any entries in the o locations are stored in the 2292 o (off-diagonal) submatrix. Note that the d matrix is stored in 2293 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2294 2295 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2296 plus the diagonal part of the d matrix, 2297 and o_nz should indicate the number of block nonzeros per row in the o matrix 2298 2299 In general, for PDE problems in which most nonzeros are near the diagonal, 2300 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2301 or you will get TERRIBLE performance; see the users' manual chapter on 2302 matrices. 2303 2304 Level: intermediate 2305 2306 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2307 @*/ 2308 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2309 { 2310 PetscErrorCode ierr; 2311 2312 PetscFunctionBegin; 2313 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2314 PetscValidType(B,1); 2315 PetscValidLogicalCollectiveInt(B,bs,2); 2316 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2317 PetscFunctionReturn(0); 2318 } 2319 2320 /*@C 2321 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2322 (block compressed row). For good matrix assembly performance 2323 the user should preallocate the matrix storage by setting the parameters 2324 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2325 performance can be increased by more than a factor of 50. 2326 2327 Collective 2328 2329 Input Parameters: 2330 + comm - MPI communicator 2331 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2332 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2333 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2334 This value should be the same as the local size used in creating the 2335 y vector for the matrix-vector product y = Ax. 2336 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2337 This value should be the same as the local size used in creating the 2338 x vector for the matrix-vector product y = Ax. 2339 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2340 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2341 . d_nz - number of block nonzeros per block row in diagonal portion of local 2342 submatrix (same for all local rows) 2343 . d_nnz - array containing the number of block nonzeros in the various block rows 2344 in the upper triangular portion of the in diagonal portion of the local 2345 (possibly different for each block block row) or NULL. 2346 If you plan to factor the matrix you must leave room for the diagonal entry and 2347 set its value even if it is zero. 2348 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2349 submatrix (same for all local rows). 2350 - o_nnz - array containing the number of nonzeros in the various block rows of the 2351 off-diagonal portion of the local submatrix (possibly different for 2352 each block row) or NULL. 2353 2354 Output Parameter: 2355 . A - the matrix 2356 2357 Options Database Keys: 2358 + -mat_no_unroll - uses code that does not unroll the loops in the 2359 block calculations (much slower) 2360 . -mat_block_size - size of the blocks to use 2361 - -mat_mpi - use the parallel matrix data structures even on one processor 2362 (defaults to using SeqBAIJ format on one processor) 2363 2364 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2365 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2366 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2367 2368 Notes: 2369 The number of rows and columns must be divisible by blocksize. 2370 This matrix type does not support complex Hermitian operation. 2371 2372 The user MUST specify either the local or global matrix dimensions 2373 (possibly both). 2374 2375 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2376 than it must be used on all processors that share the object for that argument. 2377 2378 If the *_nnz parameter is given then the *_nz parameter is ignored 2379 2380 Storage Information: 2381 For a square global matrix we define each processor's diagonal portion 2382 to be its local rows and the corresponding columns (a square submatrix); 2383 each processor's off-diagonal portion encompasses the remainder of the 2384 local matrix (a rectangular submatrix). 2385 2386 The user can specify preallocated storage for the diagonal part of 2387 the local submatrix with either d_nz or d_nnz (not both). Set 2388 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2389 memory allocation. Likewise, specify preallocated storage for the 2390 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2391 2392 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2393 the figure below we depict these three local rows and all columns (0-11). 2394 2395 .vb 2396 0 1 2 3 4 5 6 7 8 9 10 11 2397 -------------------------- 2398 row 3 |. . . d d d o o o o o o 2399 row 4 |. . . d d d o o o o o o 2400 row 5 |. . . d d d o o o o o o 2401 -------------------------- 2402 .ve 2403 2404 Thus, any entries in the d locations are stored in the d (diagonal) 2405 submatrix, and any entries in the o locations are stored in the 2406 o (off-diagonal) submatrix. Note that the d matrix is stored in 2407 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2408 2409 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2410 plus the diagonal part of the d matrix, 2411 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2412 In general, for PDE problems in which most nonzeros are near the diagonal, 2413 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2414 or you will get TERRIBLE performance; see the users' manual chapter on 2415 matrices. 2416 2417 Level: intermediate 2418 2419 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2420 @*/ 2421 2422 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2423 { 2424 PetscErrorCode ierr; 2425 PetscMPIInt size; 2426 2427 PetscFunctionBegin; 2428 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2429 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2430 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2431 if (size > 1) { 2432 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2433 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2434 } else { 2435 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2436 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2437 } 2438 PetscFunctionReturn(0); 2439 } 2440 2441 2442 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2443 { 2444 Mat mat; 2445 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2446 PetscErrorCode ierr; 2447 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2448 PetscScalar *array; 2449 2450 PetscFunctionBegin; 2451 *newmat = 0; 2452 2453 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2454 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2455 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2456 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2457 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2458 2459 mat->factortype = matin->factortype; 2460 mat->preallocated = PETSC_TRUE; 2461 mat->assembled = PETSC_TRUE; 2462 mat->insertmode = NOT_SET_VALUES; 2463 2464 a = (Mat_MPISBAIJ*)mat->data; 2465 a->bs2 = oldmat->bs2; 2466 a->mbs = oldmat->mbs; 2467 a->nbs = oldmat->nbs; 2468 a->Mbs = oldmat->Mbs; 2469 a->Nbs = oldmat->Nbs; 2470 2471 a->size = oldmat->size; 2472 a->rank = oldmat->rank; 2473 a->donotstash = oldmat->donotstash; 2474 a->roworiented = oldmat->roworiented; 2475 a->rowindices = 0; 2476 a->rowvalues = 0; 2477 a->getrowactive = PETSC_FALSE; 2478 a->barray = 0; 2479 a->rstartbs = oldmat->rstartbs; 2480 a->rendbs = oldmat->rendbs; 2481 a->cstartbs = oldmat->cstartbs; 2482 a->cendbs = oldmat->cendbs; 2483 2484 /* hash table stuff */ 2485 a->ht = 0; 2486 a->hd = 0; 2487 a->ht_size = 0; 2488 a->ht_flag = oldmat->ht_flag; 2489 a->ht_fact = oldmat->ht_fact; 2490 a->ht_total_ct = 0; 2491 a->ht_insert_ct = 0; 2492 2493 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr); 2494 if (oldmat->colmap) { 2495 #if defined(PETSC_USE_CTABLE) 2496 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2497 #else 2498 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2499 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2500 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 2501 #endif 2502 } else a->colmap = 0; 2503 2504 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2505 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2506 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2507 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 2508 } else a->garray = 0; 2509 2510 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2511 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2512 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2513 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2514 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2515 2516 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2517 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2518 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2519 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2520 2521 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2522 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2523 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2524 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2525 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2526 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2527 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2528 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2529 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2530 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2531 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2532 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2533 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2534 2535 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2536 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2537 a->sMvctx = oldmat->sMvctx; 2538 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2539 2540 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2541 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2542 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2543 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2544 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2545 *newmat = mat; 2546 PetscFunctionReturn(0); 2547 } 2548 2549 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2550 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2551 2552 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer) 2553 { 2554 PetscErrorCode ierr; 2555 PetscBool isbinary; 2556 2557 PetscFunctionBegin; 2558 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2559 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 2560 ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 2561 PetscFunctionReturn(0); 2562 } 2563 2564 /*XXXXX@ 2565 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2566 2567 Input Parameters: 2568 . mat - the matrix 2569 . fact - factor 2570 2571 Not Collective on Mat, each process can have a different hash factor 2572 2573 Level: advanced 2574 2575 Notes: 2576 This can also be set by the command line option: -mat_use_hash_table fact 2577 2578 .seealso: MatSetOption() 2579 @XXXXX*/ 2580 2581 2582 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2583 { 2584 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2585 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2586 PetscReal atmp; 2587 PetscReal *work,*svalues,*rvalues; 2588 PetscErrorCode ierr; 2589 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2590 PetscMPIInt rank,size; 2591 PetscInt *rowners_bs,dest,count,source; 2592 PetscScalar *va; 2593 MatScalar *ba; 2594 MPI_Status stat; 2595 2596 PetscFunctionBegin; 2597 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2598 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2599 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2600 2601 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2602 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2603 2604 bs = A->rmap->bs; 2605 mbs = a->mbs; 2606 Mbs = a->Mbs; 2607 ba = b->a; 2608 bi = b->i; 2609 bj = b->j; 2610 2611 /* find ownerships */ 2612 rowners_bs = A->rmap->range; 2613 2614 /* each proc creates an array to be distributed */ 2615 ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2616 2617 /* row_max for B */ 2618 if (rank != size-1) { 2619 for (i=0; i<mbs; i++) { 2620 ncols = bi[1] - bi[0]; bi++; 2621 brow = bs*i; 2622 for (j=0; j<ncols; j++) { 2623 bcol = bs*(*bj); 2624 for (kcol=0; kcol<bs; kcol++) { 2625 col = bcol + kcol; /* local col index */ 2626 col += rowners_bs[rank+1]; /* global col index */ 2627 for (krow=0; krow<bs; krow++) { 2628 atmp = PetscAbsScalar(*ba); ba++; 2629 row = brow + krow; /* local row index */ 2630 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2631 if (work[col] < atmp) work[col] = atmp; 2632 } 2633 } 2634 bj++; 2635 } 2636 } 2637 2638 /* send values to its owners */ 2639 for (dest=rank+1; dest<size; dest++) { 2640 svalues = work + rowners_bs[dest]; 2641 count = rowners_bs[dest+1]-rowners_bs[dest]; 2642 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2643 } 2644 } 2645 2646 /* receive values */ 2647 if (rank) { 2648 rvalues = work; 2649 count = rowners_bs[rank+1]-rowners_bs[rank]; 2650 for (source=0; source<rank; source++) { 2651 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2652 /* process values */ 2653 for (i=0; i<count; i++) { 2654 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2655 } 2656 } 2657 } 2658 2659 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2660 ierr = PetscFree(work);CHKERRQ(ierr); 2661 PetscFunctionReturn(0); 2662 } 2663 2664 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2665 { 2666 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2667 PetscErrorCode ierr; 2668 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2669 PetscScalar *x,*ptr,*from; 2670 Vec bb1; 2671 const PetscScalar *b; 2672 2673 PetscFunctionBegin; 2674 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2675 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2676 2677 if (flag == SOR_APPLY_UPPER) { 2678 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2679 PetscFunctionReturn(0); 2680 } 2681 2682 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2683 if (flag & SOR_ZERO_INITIAL_GUESS) { 2684 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2685 its--; 2686 } 2687 2688 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2689 while (its--) { 2690 2691 /* lower triangular part: slvec0b = - B^T*xx */ 2692 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2693 2694 /* copy xx into slvec0a */ 2695 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2696 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2697 ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr); 2698 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2699 2700 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2701 2702 /* copy bb into slvec1a */ 2703 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2704 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2705 ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr); 2706 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2707 2708 /* set slvec1b = 0 */ 2709 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2710 2711 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2712 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2713 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2714 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2715 2716 /* upper triangular part: bb1 = bb1 - B*x */ 2717 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2718 2719 /* local diagonal sweep */ 2720 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2721 } 2722 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2723 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2724 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2725 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2726 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2727 } else if (flag & SOR_EISENSTAT) { 2728 Vec xx1; 2729 PetscBool hasop; 2730 const PetscScalar *diag; 2731 PetscScalar *sl,scale = (omega - 2.0)/omega; 2732 PetscInt i,n; 2733 2734 if (!mat->xx1) { 2735 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2736 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2737 } 2738 xx1 = mat->xx1; 2739 bb1 = mat->bb1; 2740 2741 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2742 2743 if (!mat->diag) { 2744 /* this is wrong for same matrix with new nonzero values */ 2745 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2746 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2747 } 2748 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2749 2750 if (hasop) { 2751 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2752 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2753 } else { 2754 /* 2755 These two lines are replaced by code that may be a bit faster for a good compiler 2756 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2757 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2758 */ 2759 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2760 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2761 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2762 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2763 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2764 if (omega == 1.0) { 2765 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2766 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2767 } else { 2768 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2769 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2770 } 2771 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2772 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2773 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2774 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2775 } 2776 2777 /* multiply off-diagonal portion of matrix */ 2778 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2779 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2780 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2781 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2782 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 2783 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2784 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2785 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2786 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2787 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2788 2789 /* local sweep */ 2790 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2791 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2792 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2793 PetscFunctionReturn(0); 2794 } 2795 2796 /*@ 2797 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2798 CSR format the local rows. 2799 2800 Collective 2801 2802 Input Parameters: 2803 + comm - MPI communicator 2804 . bs - the block size, only a block size of 1 is supported 2805 . m - number of local rows (Cannot be PETSC_DECIDE) 2806 . n - This value should be the same as the local size used in creating the 2807 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2808 calculated if N is given) For square matrices n is almost always m. 2809 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2810 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2811 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2812 . j - column indices 2813 - a - matrix values 2814 2815 Output Parameter: 2816 . mat - the matrix 2817 2818 Level: intermediate 2819 2820 Notes: 2821 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2822 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2823 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2824 2825 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2826 2827 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2828 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2829 @*/ 2830 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2831 { 2832 PetscErrorCode ierr; 2833 2834 2835 PetscFunctionBegin; 2836 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2837 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2838 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2839 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2840 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2841 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2842 PetscFunctionReturn(0); 2843 } 2844 2845 2846 /*@C 2847 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 2848 2849 Collective 2850 2851 Input Parameters: 2852 + B - the matrix 2853 . bs - the block size 2854 . i - the indices into j for the start of each local row (starts with zero) 2855 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2856 - v - optional values in the matrix 2857 2858 Level: advanced 2859 2860 Notes: 2861 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2862 and usually the numerical values as well 2863 2864 Any entries below the diagonal are ignored 2865 2866 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2867 @*/ 2868 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2869 { 2870 PetscErrorCode ierr; 2871 2872 PetscFunctionBegin; 2873 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2874 PetscFunctionReturn(0); 2875 } 2876 2877 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2878 { 2879 PetscErrorCode ierr; 2880 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2881 PetscInt *indx; 2882 PetscScalar *values; 2883 2884 PetscFunctionBegin; 2885 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2886 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2887 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2888 PetscInt *dnz,*onz,mbs,Nbs,nbs; 2889 PetscInt *bindx,rmax=a->rmax,j; 2890 PetscMPIInt rank,size; 2891 2892 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2893 mbs = m/bs; Nbs = N/cbs; 2894 if (n == PETSC_DECIDE) { 2895 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 2896 } 2897 nbs = n/cbs; 2898 2899 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2900 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 2901 2902 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2903 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 2904 if (rank == size-1) { 2905 /* Check sum(nbs) = Nbs */ 2906 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 2907 } 2908 2909 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 2910 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2911 for (i=0; i<mbs; i++) { 2912 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 2913 nnz = nnz/bs; 2914 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2915 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 2916 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 2917 } 2918 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2919 ierr = PetscFree(bindx);CHKERRQ(ierr); 2920 2921 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2922 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2923 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 2924 ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr); 2925 ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 2926 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2927 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2928 } 2929 2930 /* numeric phase */ 2931 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2932 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 2933 2934 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2935 for (i=0; i<m; i++) { 2936 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2937 Ii = i + rstart; 2938 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2939 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2940 } 2941 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2942 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2943 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2944 PetscFunctionReturn(0); 2945 } 2946