1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 240 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 241 { 242 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 243 const MatScalar *value; 244 MatScalar *barray =baij->barray; 245 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 246 PetscErrorCode ierr; 247 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 248 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 249 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 250 251 PetscFunctionBegin; 252 if (!barray) { 253 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 254 baij->barray = barray; 255 } 256 257 if (roworiented) { 258 stepval = (n-1)*bs; 259 } else { 260 stepval = (m-1)*bs; 261 } 262 for (i=0; i<m; i++) { 263 if (im[i] < 0) continue; 264 #if defined(PETSC_USE_DEBUG) 265 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 266 #endif 267 if (im[i] >= rstart && im[i] < rend) { 268 row = im[i] - rstart; 269 for (j=0; j<n; j++) { 270 if (im[i] > in[j]) { 271 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 272 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 273 } 274 /* If NumCol = 1 then a copy is not required */ 275 if ((roworiented) && (n == 1)) { 276 barray = (MatScalar*) v + i*bs2; 277 } else if ((!roworiented) && (m == 1)) { 278 barray = (MatScalar*) v + j*bs2; 279 } else { /* Here a copy is required */ 280 if (roworiented) { 281 value = v + i*(stepval+bs)*bs + j*bs; 282 } else { 283 value = v + j*(stepval+bs)*bs + i*bs; 284 } 285 for (ii=0; ii<bs; ii++,value+=stepval) { 286 for (jj=0; jj<bs; jj++) { 287 *barray++ = *value++; 288 } 289 } 290 barray -=bs2; 291 } 292 293 if (in[j] >= cstart && in[j] < cend) { 294 col = in[j] - cstart; 295 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 296 } else if (in[j] < 0) continue; 297 #if defined(PETSC_USE_DEBUG) 298 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 299 #endif 300 else { 301 if (mat->was_assembled) { 302 if (!baij->colmap) { 303 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 304 } 305 306 #if defined(PETSC_USE_DEBUG) 307 #if defined(PETSC_USE_CTABLE) 308 { PetscInt data; 309 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 310 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 311 } 312 #else 313 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 314 #endif 315 #endif 316 #if defined(PETSC_USE_CTABLE) 317 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 318 col = (col - 1)/bs; 319 #else 320 col = (baij->colmap[in[j]] - 1)/bs; 321 #endif 322 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 323 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 324 col = in[j]; 325 } 326 } else col = in[j]; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } 329 } 330 } else { 331 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 332 if (!baij->donotstash) { 333 if (roworiented) { 334 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 335 } else { 336 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 337 } 338 } 339 } 340 } 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatGetValues_MPISBAIJ" 346 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 347 { 348 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 349 PetscErrorCode ierr; 350 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 351 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 352 353 PetscFunctionBegin; 354 for (i=0; i<m; i++) { 355 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 356 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 357 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 358 row = idxm[i] - bsrstart; 359 for (j=0; j<n; j++) { 360 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 361 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 362 if (idxn[j] >= bscstart && idxn[j] < bscend) { 363 col = idxn[j] - bscstart; 364 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 365 } else { 366 if (!baij->colmap) { 367 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 368 } 369 #if defined(PETSC_USE_CTABLE) 370 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 371 data--; 372 #else 373 data = baij->colmap[idxn[j]/bs]-1; 374 #endif 375 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 376 else { 377 col = data + idxn[j]%bs; 378 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 379 } 380 } 381 } 382 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 383 } 384 PetscFunctionReturn(0); 385 } 386 387 #undef __FUNCT__ 388 #define __FUNCT__ "MatNorm_MPISBAIJ" 389 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 390 { 391 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 392 PetscErrorCode ierr; 393 PetscReal sum[2],*lnorm2; 394 395 PetscFunctionBegin; 396 if (baij->size == 1) { 397 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 398 } else { 399 if (type == NORM_FROBENIUS) { 400 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 401 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 402 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 403 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 404 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 405 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 406 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 407 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 408 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 409 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 410 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 411 PetscReal *rsum,*rsum2,vabs; 412 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 413 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 414 MatScalar *v; 415 416 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 417 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 418 /* Amat */ 419 v = amat->a; jj = amat->j; 420 for (brow=0; brow<mbs; brow++) { 421 grow = bs*(rstart + brow); 422 nz = amat->i[brow+1] - amat->i[brow]; 423 for (bcol=0; bcol<nz; bcol++) { 424 gcol = bs*(rstart + *jj); jj++; 425 for (col=0; col<bs; col++) { 426 for (row=0; row<bs; row++) { 427 vabs = PetscAbsScalar(*v); v++; 428 rsum[gcol+col] += vabs; 429 /* non-diagonal block */ 430 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 431 } 432 } 433 } 434 } 435 /* Bmat */ 436 v = bmat->a; jj = bmat->j; 437 for (brow=0; brow<mbs; brow++) { 438 grow = bs*(rstart + brow); 439 nz = bmat->i[brow+1] - bmat->i[brow]; 440 for (bcol=0; bcol<nz; bcol++) { 441 gcol = bs*garray[*jj]; jj++; 442 for (col=0; col<bs; col++) { 443 for (row=0; row<bs; row++) { 444 vabs = PetscAbsScalar(*v); v++; 445 rsum[gcol+col] += vabs; 446 rsum[grow+row] += vabs; 447 } 448 } 449 } 450 } 451 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 452 *norm = 0.0; 453 for (col=0; col<mat->cmap->N; col++) { 454 if (rsum2[col] > *norm) *norm = rsum2[col]; 455 } 456 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 457 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 458 } 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 464 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 465 { 466 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 467 PetscErrorCode ierr; 468 PetscInt nstash,reallocs; 469 InsertMode addv; 470 471 PetscFunctionBegin; 472 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 473 474 /* make sure all processors are either in INSERTMODE or ADDMODE */ 475 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 476 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 477 mat->insertmode = addv; /* in case this processor had no cache */ 478 479 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 480 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 481 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 482 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 483 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 484 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 490 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 491 { 492 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 493 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 494 PetscErrorCode ierr; 495 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 496 PetscInt *row,*col; 497 PetscBool other_disassembled; 498 PetscMPIInt n; 499 PetscBool r1,r2,r3; 500 MatScalar *val; 501 InsertMode addv = mat->insertmode; 502 503 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 504 PetscFunctionBegin; 505 if (!baij->donotstash && !mat->nooffprocentries) { 506 while (1) { 507 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 508 if (!flg) break; 509 510 for (i=0; i<n;) { 511 /* Now identify the consecutive vals belonging to the same row */ 512 for (j=i,rstart=row[j]; j<n; j++) { 513 if (row[j] != rstart) break; 514 } 515 if (j < n) ncols = j-i; 516 else ncols = n-i; 517 /* Now assemble all these values with a single function call */ 518 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 519 i = j; 520 } 521 } 522 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 523 /* Now process the block-stash. Since the values are stashed column-oriented, 524 set the roworiented flag to column oriented, and after MatSetValues() 525 restore the original flags */ 526 r1 = baij->roworiented; 527 r2 = a->roworiented; 528 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 529 530 baij->roworiented = PETSC_FALSE; 531 a->roworiented = PETSC_FALSE; 532 533 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 534 while (1) { 535 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 536 if (!flg) break; 537 538 for (i=0; i<n;) { 539 /* Now identify the consecutive vals belonging to the same row */ 540 for (j=i,rstart=row[j]; j<n; j++) { 541 if (row[j] != rstart) break; 542 } 543 if (j < n) ncols = j-i; 544 else ncols = n-i; 545 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 546 i = j; 547 } 548 } 549 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 550 551 baij->roworiented = r1; 552 a->roworiented = r2; 553 554 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 555 } 556 557 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 558 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 559 560 /* determine if any processor has disassembled, if so we must 561 also disassemble ourselfs, in order that we may reassemble. */ 562 /* 563 if nonzero structure of submatrix B cannot change then we know that 564 no processor disassembled thus we can skip this stuff 565 */ 566 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 567 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 568 if (mat->was_assembled && !other_disassembled) { 569 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 570 } 571 } 572 573 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 574 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 575 } 576 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 577 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 578 579 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 580 581 baij->rowvalues = 0; 582 583 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 584 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 585 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 586 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 587 } 588 PetscFunctionReturn(0); 589 } 590 591 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 592 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 593 #include <petscdraw.h> 594 #undef __FUNCT__ 595 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 596 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 597 { 598 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 599 PetscErrorCode ierr; 600 PetscInt bs = mat->rmap->bs; 601 PetscMPIInt rank = baij->rank; 602 PetscBool iascii,isdraw; 603 PetscViewer sviewer; 604 PetscViewerFormat format; 605 606 PetscFunctionBegin; 607 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 608 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 609 if (iascii) { 610 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 611 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 612 MatInfo info; 613 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 614 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 615 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 616 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 617 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 618 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 619 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 620 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 621 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 622 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 623 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 624 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 625 PetscFunctionReturn(0); 626 } else if (format == PETSC_VIEWER_ASCII_INFO) { 627 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 630 PetscFunctionReturn(0); 631 } 632 } 633 634 if (isdraw) { 635 PetscDraw draw; 636 PetscBool isnull; 637 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 638 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 639 } 640 641 { 642 /* assemble the entire matrix onto first processor. */ 643 Mat A; 644 Mat_SeqSBAIJ *Aloc; 645 Mat_SeqBAIJ *Bloc; 646 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 647 MatScalar *a; 648 const char *matname; 649 650 /* Should this be the same type as mat? */ 651 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 652 if (!rank) { 653 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 654 } else { 655 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 656 } 657 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 658 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 659 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 660 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 661 662 /* copy over the A part */ 663 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 664 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 665 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 666 667 for (i=0; i<mbs; i++) { 668 rvals[0] = bs*(baij->rstartbs + i); 669 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 670 for (j=ai[i]; j<ai[i+1]; j++) { 671 col = (baij->cstartbs+aj[j])*bs; 672 for (k=0; k<bs; k++) { 673 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 674 col++; 675 a += bs; 676 } 677 } 678 } 679 /* copy over the B part */ 680 Bloc = (Mat_SeqBAIJ*)baij->B->data; 681 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 682 for (i=0; i<mbs; i++) { 683 684 rvals[0] = bs*(baij->rstartbs + i); 685 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 686 for (j=ai[i]; j<ai[i+1]; j++) { 687 col = baij->garray[aj[j]]*bs; 688 for (k=0; k<bs; k++) { 689 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 690 col++; 691 a += bs; 692 } 693 } 694 } 695 ierr = PetscFree(rvals);CHKERRQ(ierr); 696 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 697 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 698 /* 699 Everyone has to call to draw the matrix since the graphics waits are 700 synchronized across all processors that share the PetscDraw object 701 */ 702 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 703 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 704 if (!rank) { 705 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 706 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 707 } 708 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 709 ierr = MatDestroy(&A);CHKERRQ(ierr); 710 } 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "MatView_MPISBAIJ" 716 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 717 { 718 PetscErrorCode ierr; 719 PetscBool iascii,isdraw,issocket,isbinary; 720 721 PetscFunctionBegin; 722 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 723 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 724 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 725 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 726 if (iascii || isdraw || issocket || isbinary) { 727 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 728 } 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatDestroy_MPISBAIJ" 734 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 735 { 736 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 #if defined(PETSC_USE_LOG) 741 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 742 #endif 743 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 744 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 745 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 746 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 747 #if defined(PETSC_USE_CTABLE) 748 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 749 #else 750 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 751 #endif 752 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 753 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 754 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 755 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 756 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 757 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 758 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 759 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 760 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 761 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 762 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 763 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 764 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 765 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 766 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 767 #if defined(PETSC_USE_REAL_MAT_SINGLE) 768 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 769 #endif 770 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 771 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 772 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 773 ierr = PetscFree(mat->data);CHKERRQ(ierr); 774 775 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 776 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 777 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 778 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 779 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 780 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 781 #if defined(PETSC_HAVE_ELEMENTAL) 782 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 783 #endif 784 PetscFunctionReturn(0); 785 } 786 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 789 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 790 { 791 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 792 PetscErrorCode ierr; 793 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 794 PetscScalar *x,*from; 795 796 PetscFunctionBegin; 797 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 798 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 799 800 /* diagonal part */ 801 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 802 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 803 804 /* subdiagonal part */ 805 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 806 807 /* copy x into the vec slvec0 */ 808 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 809 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 810 811 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 812 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 813 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 814 815 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 816 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 817 /* supperdiagonal part */ 818 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMult_MPISBAIJ" 824 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 825 { 826 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 827 PetscErrorCode ierr; 828 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 829 PetscScalar *from; 830 const PetscScalar *x; 831 832 PetscFunctionBegin; 833 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 834 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 835 836 /* diagonal part */ 837 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 838 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 839 840 /* subdiagonal part */ 841 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 842 843 /* copy x into the vec slvec0 */ 844 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 845 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 846 847 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 848 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 849 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 850 851 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 852 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 853 /* supperdiagonal part */ 854 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 860 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 861 { 862 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 863 PetscErrorCode ierr; 864 PetscInt nt; 865 866 PetscFunctionBegin; 867 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 868 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 869 870 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 871 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 872 873 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 /* do diagonal part */ 875 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 876 /* do supperdiagonal part */ 877 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 879 /* do subdiagonal part */ 880 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 881 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 882 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 888 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 889 { 890 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 891 PetscErrorCode ierr; 892 PetscInt mbs=a->mbs,bs=A->rmap->bs; 893 PetscScalar *from,zero=0.0; 894 const PetscScalar *x; 895 896 PetscFunctionBegin; 897 /* 898 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 899 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 900 */ 901 /* diagonal part */ 902 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 903 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 904 905 /* subdiagonal part */ 906 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 907 908 /* copy x into the vec slvec0 */ 909 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 910 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 911 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 912 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 913 914 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 916 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 918 /* supperdiagonal part */ 919 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 920 PetscFunctionReturn(0); 921 } 922 923 #undef __FUNCT__ 924 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 925 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 926 { 927 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 928 PetscErrorCode ierr; 929 930 PetscFunctionBegin; 931 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 932 /* do diagonal part */ 933 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 934 /* do supperdiagonal part */ 935 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 936 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 937 938 /* do subdiagonal part */ 939 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 940 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 941 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 942 PetscFunctionReturn(0); 943 } 944 945 /* 946 This only works correctly for square matrices where the subblock A->A is the 947 diagonal block 948 */ 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 951 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 952 { 953 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 958 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatScale_MPISBAIJ" 964 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 965 { 966 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 967 PetscErrorCode ierr; 968 969 PetscFunctionBegin; 970 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 971 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatGetRow_MPISBAIJ" 977 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 978 { 979 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 980 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 981 PetscErrorCode ierr; 982 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 983 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 984 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 985 986 PetscFunctionBegin; 987 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 988 mat->getrowactive = PETSC_TRUE; 989 990 if (!mat->rowvalues && (idx || v)) { 991 /* 992 allocate enough space to hold information from the longest row. 993 */ 994 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 995 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 996 PetscInt max = 1,mbs = mat->mbs,tmp; 997 for (i=0; i<mbs; i++) { 998 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 999 if (max < tmp) max = tmp; 1000 } 1001 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1002 } 1003 1004 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1005 lrow = row - brstart; /* local row index */ 1006 1007 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1008 if (!v) {pvA = 0; pvB = 0;} 1009 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1010 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1011 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1012 nztot = nzA + nzB; 1013 1014 cmap = mat->garray; 1015 if (v || idx) { 1016 if (nztot) { 1017 /* Sort by increasing column numbers, assuming A and B already sorted */ 1018 PetscInt imark = -1; 1019 if (v) { 1020 *v = v_p = mat->rowvalues; 1021 for (i=0; i<nzB; i++) { 1022 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1023 else break; 1024 } 1025 imark = i; 1026 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1027 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1028 } 1029 if (idx) { 1030 *idx = idx_p = mat->rowindices; 1031 if (imark > -1) { 1032 for (i=0; i<imark; i++) { 1033 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1034 } 1035 } else { 1036 for (i=0; i<nzB; i++) { 1037 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1038 else break; 1039 } 1040 imark = i; 1041 } 1042 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1043 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1044 } 1045 } else { 1046 if (idx) *idx = 0; 1047 if (v) *v = 0; 1048 } 1049 } 1050 *nz = nztot; 1051 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1052 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1058 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1059 { 1060 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1061 1062 PetscFunctionBegin; 1063 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1064 baij->getrowactive = PETSC_FALSE; 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1070 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1071 { 1072 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1073 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1074 1075 PetscFunctionBegin; 1076 aA->getrow_utriangular = PETSC_TRUE; 1077 PetscFunctionReturn(0); 1078 } 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1081 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1082 { 1083 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1084 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1085 1086 PetscFunctionBegin; 1087 aA->getrow_utriangular = PETSC_FALSE; 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1093 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1094 { 1095 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1100 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1106 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1107 { 1108 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1113 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1118 Input: isrow - distributed(parallel), 1119 iscol_local - locally owned (seq) 1120 */ 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "ISEqual_private" 1123 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1124 { 1125 PetscErrorCode ierr; 1126 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1127 const PetscInt *ptr1,*ptr2; 1128 1129 PetscFunctionBegin; 1130 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1131 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1132 if (sz1 > sz2) { 1133 *flg = PETSC_FALSE; 1134 PetscFunctionReturn(0); 1135 } 1136 1137 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1138 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1139 1140 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1142 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1143 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1144 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1145 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1146 1147 nmatch=0; 1148 k = 0; 1149 for (i=0; i<sz1; i++){ 1150 for (j=k; j<sz2; j++){ 1151 if (a1[i] == a2[j]) { 1152 k = j; nmatch++; 1153 break; 1154 } 1155 } 1156 } 1157 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1158 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1159 ierr = PetscFree(a1);CHKERRQ(ierr); 1160 ierr = PetscFree(a2);CHKERRQ(ierr); 1161 if (nmatch < sz1) { 1162 *flg = PETSC_FALSE; 1163 } else { 1164 *flg = PETSC_TRUE; 1165 } 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1171 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1172 { 1173 PetscErrorCode ierr; 1174 IS iscol_local; 1175 PetscInt csize; 1176 PetscBool isequal; 1177 1178 PetscFunctionBegin; 1179 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1180 if (call == MAT_REUSE_MATRIX) { 1181 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1182 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1183 } else { 1184 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1185 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1186 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1187 } 1188 1189 /* now call MatGetSubMatrix_MPIBAIJ() */ 1190 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1191 if (call == MAT_INITIAL_MATRIX) { 1192 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1193 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1200 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1201 { 1202 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1207 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1213 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1214 { 1215 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1216 Mat A = a->A,B = a->B; 1217 PetscErrorCode ierr; 1218 PetscReal isend[5],irecv[5]; 1219 1220 PetscFunctionBegin; 1221 info->block_size = (PetscReal)matin->rmap->bs; 1222 1223 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1224 1225 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1226 isend[3] = info->memory; isend[4] = info->mallocs; 1227 1228 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1229 1230 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1231 isend[3] += info->memory; isend[4] += info->mallocs; 1232 if (flag == MAT_LOCAL) { 1233 info->nz_used = isend[0]; 1234 info->nz_allocated = isend[1]; 1235 info->nz_unneeded = isend[2]; 1236 info->memory = isend[3]; 1237 info->mallocs = isend[4]; 1238 } else if (flag == MAT_GLOBAL_MAX) { 1239 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1240 1241 info->nz_used = irecv[0]; 1242 info->nz_allocated = irecv[1]; 1243 info->nz_unneeded = irecv[2]; 1244 info->memory = irecv[3]; 1245 info->mallocs = irecv[4]; 1246 } else if (flag == MAT_GLOBAL_SUM) { 1247 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1248 1249 info->nz_used = irecv[0]; 1250 info->nz_allocated = irecv[1]; 1251 info->nz_unneeded = irecv[2]; 1252 info->memory = irecv[3]; 1253 info->mallocs = irecv[4]; 1254 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1255 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1256 info->fill_ratio_needed = 0; 1257 info->factor_mallocs = 0; 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1263 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1264 { 1265 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1266 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1267 PetscErrorCode ierr; 1268 1269 PetscFunctionBegin; 1270 switch (op) { 1271 case MAT_NEW_NONZERO_LOCATIONS: 1272 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1273 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1274 case MAT_KEEP_NONZERO_PATTERN: 1275 case MAT_NEW_NONZERO_LOCATION_ERR: 1276 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1277 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1278 break; 1279 case MAT_ROW_ORIENTED: 1280 a->roworiented = flg; 1281 1282 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1283 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1284 break; 1285 case MAT_NEW_DIAGONALS: 1286 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1287 break; 1288 case MAT_IGNORE_OFF_PROC_ENTRIES: 1289 a->donotstash = flg; 1290 break; 1291 case MAT_USE_HASH_TABLE: 1292 a->ht_flag = flg; 1293 break; 1294 case MAT_HERMITIAN: 1295 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1296 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1297 1298 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1299 break; 1300 case MAT_SPD: 1301 A->spd_set = PETSC_TRUE; 1302 A->spd = flg; 1303 if (flg) { 1304 A->symmetric = PETSC_TRUE; 1305 A->structurally_symmetric = PETSC_TRUE; 1306 A->symmetric_set = PETSC_TRUE; 1307 A->structurally_symmetric_set = PETSC_TRUE; 1308 } 1309 break; 1310 case MAT_SYMMETRIC: 1311 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1312 break; 1313 case MAT_STRUCTURALLY_SYMMETRIC: 1314 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1315 break; 1316 case MAT_SYMMETRY_ETERNAL: 1317 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1318 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1319 break; 1320 case MAT_IGNORE_LOWER_TRIANGULAR: 1321 aA->ignore_ltriangular = flg; 1322 break; 1323 case MAT_ERROR_LOWER_TRIANGULAR: 1324 aA->ignore_ltriangular = flg; 1325 break; 1326 case MAT_GETROW_UPPERTRIANGULAR: 1327 aA->getrow_utriangular = flg; 1328 break; 1329 default: 1330 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1331 } 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1337 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1338 { 1339 PetscErrorCode ierr; 1340 1341 PetscFunctionBegin; 1342 if (MAT_INITIAL_MATRIX || *B != A) { 1343 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1344 } 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1350 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1351 { 1352 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1353 Mat a = baij->A, b=baij->B; 1354 PetscErrorCode ierr; 1355 PetscInt nv,m,n; 1356 PetscBool flg; 1357 1358 PetscFunctionBegin; 1359 if (ll != rr) { 1360 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1361 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1362 } 1363 if (!ll) PetscFunctionReturn(0); 1364 1365 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1366 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1367 1368 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1369 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1370 1371 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1372 1373 /* left diagonalscale the off-diagonal part */ 1374 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1375 1376 /* scale the diagonal part */ 1377 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1378 1379 /* right diagonalscale the off-diagonal part */ 1380 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1381 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1387 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1388 { 1389 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1390 PetscErrorCode ierr; 1391 1392 PetscFunctionBegin; 1393 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1394 PetscFunctionReturn(0); 1395 } 1396 1397 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatEqual_MPISBAIJ" 1401 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1402 { 1403 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1404 Mat a,b,c,d; 1405 PetscBool flg; 1406 PetscErrorCode ierr; 1407 1408 PetscFunctionBegin; 1409 a = matA->A; b = matA->B; 1410 c = matB->A; d = matB->B; 1411 1412 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1413 if (flg) { 1414 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1415 } 1416 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatCopy_MPISBAIJ" 1422 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1423 { 1424 PetscErrorCode ierr; 1425 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1426 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1427 1428 PetscFunctionBegin; 1429 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1430 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1431 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1432 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1433 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1434 } else { 1435 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1436 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1437 } 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1443 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1444 { 1445 PetscErrorCode ierr; 1446 1447 PetscFunctionBegin; 1448 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1454 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1455 { 1456 PetscErrorCode ierr; 1457 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1458 PetscBLASInt bnz,one=1; 1459 Mat_SeqSBAIJ *xa,*ya; 1460 Mat_SeqBAIJ *xb,*yb; 1461 1462 PetscFunctionBegin; 1463 if (str == SAME_NONZERO_PATTERN) { 1464 PetscScalar alpha = a; 1465 xa = (Mat_SeqSBAIJ*)xx->A->data; 1466 ya = (Mat_SeqSBAIJ*)yy->A->data; 1467 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1468 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1469 xb = (Mat_SeqBAIJ*)xx->B->data; 1470 yb = (Mat_SeqBAIJ*)yy->B->data; 1471 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1472 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1473 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1474 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1475 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1476 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1477 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1478 } else { 1479 Mat B; 1480 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1481 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1482 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1483 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1484 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1485 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1486 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1487 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1488 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1489 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1490 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1491 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1492 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1493 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1494 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1495 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1496 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1497 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1498 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1499 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1506 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1507 { 1508 PetscErrorCode ierr; 1509 PetscInt i; 1510 PetscBool flg; 1511 1512 PetscFunctionBegin; 1513 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1514 for (i=0; i<n; i++) { 1515 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1516 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1517 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1518 } 1519 } 1520 PetscFunctionReturn(0); 1521 } 1522 1523 /* -------------------------------------------------------------------*/ 1524 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1525 MatGetRow_MPISBAIJ, 1526 MatRestoreRow_MPISBAIJ, 1527 MatMult_MPISBAIJ, 1528 /* 4*/ MatMultAdd_MPISBAIJ, 1529 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1530 MatMultAdd_MPISBAIJ, 1531 0, 1532 0, 1533 0, 1534 /* 10*/ 0, 1535 0, 1536 0, 1537 MatSOR_MPISBAIJ, 1538 MatTranspose_MPISBAIJ, 1539 /* 15*/ MatGetInfo_MPISBAIJ, 1540 MatEqual_MPISBAIJ, 1541 MatGetDiagonal_MPISBAIJ, 1542 MatDiagonalScale_MPISBAIJ, 1543 MatNorm_MPISBAIJ, 1544 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1545 MatAssemblyEnd_MPISBAIJ, 1546 MatSetOption_MPISBAIJ, 1547 MatZeroEntries_MPISBAIJ, 1548 /* 24*/ 0, 1549 0, 1550 0, 1551 0, 1552 0, 1553 /* 29*/ MatSetUp_MPISBAIJ, 1554 0, 1555 0, 1556 0, 1557 0, 1558 /* 34*/ MatDuplicate_MPISBAIJ, 1559 0, 1560 0, 1561 0, 1562 0, 1563 /* 39*/ MatAXPY_MPISBAIJ, 1564 MatGetSubMatrices_MPISBAIJ, 1565 MatIncreaseOverlap_MPISBAIJ, 1566 MatGetValues_MPISBAIJ, 1567 MatCopy_MPISBAIJ, 1568 /* 44*/ 0, 1569 MatScale_MPISBAIJ, 1570 0, 1571 0, 1572 0, 1573 /* 49*/ 0, 1574 0, 1575 0, 1576 0, 1577 0, 1578 /* 54*/ 0, 1579 0, 1580 MatSetUnfactored_MPISBAIJ, 1581 0, 1582 MatSetValuesBlocked_MPISBAIJ, 1583 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1584 0, 1585 0, 1586 0, 1587 0, 1588 /* 64*/ 0, 1589 0, 1590 0, 1591 0, 1592 0, 1593 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1594 0, 1595 0, 1596 0, 1597 0, 1598 /* 74*/ 0, 1599 0, 1600 0, 1601 0, 1602 0, 1603 /* 79*/ 0, 1604 0, 1605 0, 1606 0, 1607 MatLoad_MPISBAIJ, 1608 /* 84*/ 0, 1609 0, 1610 0, 1611 0, 1612 0, 1613 /* 89*/ 0, 1614 0, 1615 0, 1616 0, 1617 0, 1618 /* 94*/ 0, 1619 0, 1620 0, 1621 0, 1622 0, 1623 /* 99*/ 0, 1624 0, 1625 0, 1626 0, 1627 0, 1628 /*104*/ 0, 1629 MatRealPart_MPISBAIJ, 1630 MatImaginaryPart_MPISBAIJ, 1631 MatGetRowUpperTriangular_MPISBAIJ, 1632 MatRestoreRowUpperTriangular_MPISBAIJ, 1633 /*109*/ 0, 1634 0, 1635 0, 1636 0, 1637 0, 1638 /*114*/ 0, 1639 0, 1640 0, 1641 0, 1642 0, 1643 /*119*/ 0, 1644 0, 1645 0, 1646 0, 1647 0, 1648 /*124*/ 0, 1649 0, 1650 0, 1651 0, 1652 0, 1653 /*129*/ 0, 1654 0, 1655 0, 1656 0, 1657 0, 1658 /*134*/ 0, 1659 0, 1660 0, 1661 0, 1662 0, 1663 /*139*/ 0, 1664 0, 1665 0, 1666 0, 1667 0, 1668 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1669 }; 1670 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1673 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1674 { 1675 PetscFunctionBegin; 1676 *a = ((Mat_MPISBAIJ*)A->data)->A; 1677 PetscFunctionReturn(0); 1678 } 1679 1680 #undef __FUNCT__ 1681 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1682 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1683 { 1684 Mat_MPISBAIJ *b; 1685 PetscErrorCode ierr; 1686 PetscInt i,mbs,Mbs; 1687 1688 PetscFunctionBegin; 1689 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1690 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1691 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1692 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1693 1694 b = (Mat_MPISBAIJ*)B->data; 1695 mbs = B->rmap->n/bs; 1696 Mbs = B->rmap->N/bs; 1697 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1698 1699 B->rmap->bs = bs; 1700 b->bs2 = bs*bs; 1701 b->mbs = mbs; 1702 b->Mbs = Mbs; 1703 b->nbs = B->cmap->n/bs; 1704 b->Nbs = B->cmap->N/bs; 1705 1706 for (i=0; i<=b->size; i++) { 1707 b->rangebs[i] = B->rmap->range[i]/bs; 1708 } 1709 b->rstartbs = B->rmap->rstart/bs; 1710 b->rendbs = B->rmap->rend/bs; 1711 1712 b->cstartbs = B->cmap->rstart/bs; 1713 b->cendbs = B->cmap->rend/bs; 1714 1715 if (!B->preallocated) { 1716 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1717 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1718 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1719 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1720 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1721 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1722 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1723 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1724 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1725 } 1726 1727 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1728 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1729 1730 B->preallocated = PETSC_TRUE; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1736 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1737 { 1738 PetscInt m,rstart,cstart,cend; 1739 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1740 const PetscInt *JJ =0; 1741 PetscScalar *values=0; 1742 PetscErrorCode ierr; 1743 1744 PetscFunctionBegin; 1745 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1746 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1747 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1748 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1749 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1750 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1751 m = B->rmap->n/bs; 1752 rstart = B->rmap->rstart/bs; 1753 cstart = B->cmap->rstart/bs; 1754 cend = B->cmap->rend/bs; 1755 1756 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1757 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1758 for (i=0; i<m; i++) { 1759 nz = ii[i+1] - ii[i]; 1760 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1761 nz_max = PetscMax(nz_max,nz); 1762 JJ = jj + ii[i]; 1763 for (j=0; j<nz; j++) { 1764 if (*JJ >= cstart) break; 1765 JJ++; 1766 } 1767 d = 0; 1768 for (; j<nz; j++) { 1769 if (*JJ++ >= cend) break; 1770 d++; 1771 } 1772 d_nnz[i] = d; 1773 o_nnz[i] = nz - d; 1774 } 1775 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1776 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1777 1778 values = (PetscScalar*)V; 1779 if (!values) { 1780 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 1781 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1782 } 1783 for (i=0; i<m; i++) { 1784 PetscInt row = i + rstart; 1785 PetscInt ncols = ii[i+1] - ii[i]; 1786 const PetscInt *icols = jj + ii[i]; 1787 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1788 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1789 } 1790 1791 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1792 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1793 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1794 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1795 PetscFunctionReturn(0); 1796 } 1797 1798 /*MC 1799 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1800 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1801 the matrix is stored. 1802 1803 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1804 can call MatSetOption(Mat, MAT_HERMITIAN); 1805 1806 Options Database Keys: 1807 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1808 1809 Level: beginner 1810 1811 .seealso: MatCreateMPISBAIJ 1812 M*/ 1813 1814 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 1815 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatCreate_MPISBAIJ" 1818 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1819 { 1820 Mat_MPISBAIJ *b; 1821 PetscErrorCode ierr; 1822 PetscBool flg = PETSC_FALSE; 1823 1824 PetscFunctionBegin; 1825 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1826 B->data = (void*)b; 1827 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1828 1829 B->ops->destroy = MatDestroy_MPISBAIJ; 1830 B->ops->view = MatView_MPISBAIJ; 1831 B->assembled = PETSC_FALSE; 1832 B->insertmode = NOT_SET_VALUES; 1833 1834 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 1835 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 1836 1837 /* build local table of row and column ownerships */ 1838 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 1839 1840 /* build cache for off array entries formed */ 1841 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 1842 1843 b->donotstash = PETSC_FALSE; 1844 b->colmap = NULL; 1845 b->garray = NULL; 1846 b->roworiented = PETSC_TRUE; 1847 1848 /* stuff used in block assembly */ 1849 b->barray = 0; 1850 1851 /* stuff used for matrix vector multiply */ 1852 b->lvec = 0; 1853 b->Mvctx = 0; 1854 b->slvec0 = 0; 1855 b->slvec0b = 0; 1856 b->slvec1 = 0; 1857 b->slvec1a = 0; 1858 b->slvec1b = 0; 1859 b->sMvctx = 0; 1860 1861 /* stuff for MatGetRow() */ 1862 b->rowindices = 0; 1863 b->rowvalues = 0; 1864 b->getrowactive = PETSC_FALSE; 1865 1866 /* hash table stuff */ 1867 b->ht = 0; 1868 b->hd = 0; 1869 b->ht_size = 0; 1870 b->ht_flag = PETSC_FALSE; 1871 b->ht_fact = 0; 1872 b->ht_total_ct = 0; 1873 b->ht_insert_ct = 0; 1874 1875 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1876 b->ijonly = PETSC_FALSE; 1877 1878 b->in_loc = 0; 1879 b->v_loc = 0; 1880 b->n_loc = 0; 1881 1882 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1883 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1884 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1885 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1886 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1887 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1888 #if defined(PETSC_HAVE_ELEMENTAL) 1889 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 1890 #endif 1891 1892 B->symmetric = PETSC_TRUE; 1893 B->structurally_symmetric = PETSC_TRUE; 1894 B->symmetric_set = PETSC_TRUE; 1895 B->structurally_symmetric_set = PETSC_TRUE; 1896 1897 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1898 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1899 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 1900 if (flg) { 1901 PetscReal fact = 1.39; 1902 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1903 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 1904 if (fact <= 1.0) fact = 1.39; 1905 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1906 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1907 } 1908 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 /*MC 1913 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1914 1915 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1916 and MATMPISBAIJ otherwise. 1917 1918 Options Database Keys: 1919 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1920 1921 Level: beginner 1922 1923 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1924 M*/ 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1928 /*@C 1929 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1930 the user should preallocate the matrix storage by setting the parameters 1931 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1932 performance can be increased by more than a factor of 50. 1933 1934 Collective on Mat 1935 1936 Input Parameters: 1937 + B - the matrix 1938 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 1939 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 1940 . d_nz - number of block nonzeros per block row in diagonal portion of local 1941 submatrix (same for all local rows) 1942 . d_nnz - array containing the number of block nonzeros in the various block rows 1943 in the upper triangular and diagonal part of the in diagonal portion of the local 1944 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 1945 for the diagonal entry and set a value even if it is zero. 1946 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1947 submatrix (same for all local rows). 1948 - o_nnz - array containing the number of nonzeros in the various block rows of the 1949 off-diagonal portion of the local submatrix that is right of the diagonal 1950 (possibly different for each block row) or NULL. 1951 1952 1953 Options Database Keys: 1954 . -mat_no_unroll - uses code that does not unroll the loops in the 1955 block calculations (much slower) 1956 . -mat_block_size - size of the blocks to use 1957 1958 Notes: 1959 1960 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1961 than it must be used on all processors that share the object for that argument. 1962 1963 If the *_nnz parameter is given then the *_nz parameter is ignored 1964 1965 Storage Information: 1966 For a square global matrix we define each processor's diagonal portion 1967 to be its local rows and the corresponding columns (a square submatrix); 1968 each processor's off-diagonal portion encompasses the remainder of the 1969 local matrix (a rectangular submatrix). 1970 1971 The user can specify preallocated storage for the diagonal part of 1972 the local submatrix with either d_nz or d_nnz (not both). Set 1973 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 1974 memory allocation. Likewise, specify preallocated storage for the 1975 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1976 1977 You can call MatGetInfo() to get information on how effective the preallocation was; 1978 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1979 You can also run with the option -info and look for messages with the string 1980 malloc in them to see if additional memory allocation was needed. 1981 1982 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1983 the figure below we depict these three local rows and all columns (0-11). 1984 1985 .vb 1986 0 1 2 3 4 5 6 7 8 9 10 11 1987 -------------------------- 1988 row 3 |. . . d d d o o o o o o 1989 row 4 |. . . d d d o o o o o o 1990 row 5 |. . . d d d o o o o o o 1991 -------------------------- 1992 .ve 1993 1994 Thus, any entries in the d locations are stored in the d (diagonal) 1995 submatrix, and any entries in the o locations are stored in the 1996 o (off-diagonal) submatrix. Note that the d matrix is stored in 1997 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1998 1999 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2000 plus the diagonal part of the d matrix, 2001 and o_nz should indicate the number of block nonzeros per row in the o matrix 2002 2003 In general, for PDE problems in which most nonzeros are near the diagonal, 2004 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2005 or you will get TERRIBLE performance; see the users' manual chapter on 2006 matrices. 2007 2008 Level: intermediate 2009 2010 .keywords: matrix, block, aij, compressed row, sparse, parallel 2011 2012 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2013 @*/ 2014 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2015 { 2016 PetscErrorCode ierr; 2017 2018 PetscFunctionBegin; 2019 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2020 PetscValidType(B,1); 2021 PetscValidLogicalCollectiveInt(B,bs,2); 2022 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2023 PetscFunctionReturn(0); 2024 } 2025 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "MatCreateSBAIJ" 2028 /*@C 2029 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2030 (block compressed row). For good matrix assembly performance 2031 the user should preallocate the matrix storage by setting the parameters 2032 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2033 performance can be increased by more than a factor of 50. 2034 2035 Collective on MPI_Comm 2036 2037 Input Parameters: 2038 + comm - MPI communicator 2039 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2040 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2041 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2042 This value should be the same as the local size used in creating the 2043 y vector for the matrix-vector product y = Ax. 2044 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2045 This value should be the same as the local size used in creating the 2046 x vector for the matrix-vector product y = Ax. 2047 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2048 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2049 . d_nz - number of block nonzeros per block row in diagonal portion of local 2050 submatrix (same for all local rows) 2051 . d_nnz - array containing the number of block nonzeros in the various block rows 2052 in the upper triangular portion of the in diagonal portion of the local 2053 (possibly different for each block block row) or NULL. 2054 If you plan to factor the matrix you must leave room for the diagonal entry and 2055 set its value even if it is zero. 2056 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2057 submatrix (same for all local rows). 2058 - o_nnz - array containing the number of nonzeros in the various block rows of the 2059 off-diagonal portion of the local submatrix (possibly different for 2060 each block row) or NULL. 2061 2062 Output Parameter: 2063 . A - the matrix 2064 2065 Options Database Keys: 2066 . -mat_no_unroll - uses code that does not unroll the loops in the 2067 block calculations (much slower) 2068 . -mat_block_size - size of the blocks to use 2069 . -mat_mpi - use the parallel matrix data structures even on one processor 2070 (defaults to using SeqBAIJ format on one processor) 2071 2072 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2073 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2074 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2075 2076 Notes: 2077 The number of rows and columns must be divisible by blocksize. 2078 This matrix type does not support complex Hermitian operation. 2079 2080 The user MUST specify either the local or global matrix dimensions 2081 (possibly both). 2082 2083 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2084 than it must be used on all processors that share the object for that argument. 2085 2086 If the *_nnz parameter is given then the *_nz parameter is ignored 2087 2088 Storage Information: 2089 For a square global matrix we define each processor's diagonal portion 2090 to be its local rows and the corresponding columns (a square submatrix); 2091 each processor's off-diagonal portion encompasses the remainder of the 2092 local matrix (a rectangular submatrix). 2093 2094 The user can specify preallocated storage for the diagonal part of 2095 the local submatrix with either d_nz or d_nnz (not both). Set 2096 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2097 memory allocation. Likewise, specify preallocated storage for the 2098 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2099 2100 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2101 the figure below we depict these three local rows and all columns (0-11). 2102 2103 .vb 2104 0 1 2 3 4 5 6 7 8 9 10 11 2105 -------------------------- 2106 row 3 |. . . d d d o o o o o o 2107 row 4 |. . . d d d o o o o o o 2108 row 5 |. . . d d d o o o o o o 2109 -------------------------- 2110 .ve 2111 2112 Thus, any entries in the d locations are stored in the d (diagonal) 2113 submatrix, and any entries in the o locations are stored in the 2114 o (off-diagonal) submatrix. Note that the d matrix is stored in 2115 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2116 2117 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2118 plus the diagonal part of the d matrix, 2119 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2120 In general, for PDE problems in which most nonzeros are near the diagonal, 2121 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2122 or you will get TERRIBLE performance; see the users' manual chapter on 2123 matrices. 2124 2125 Level: intermediate 2126 2127 .keywords: matrix, block, aij, compressed row, sparse, parallel 2128 2129 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2130 @*/ 2131 2132 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2133 { 2134 PetscErrorCode ierr; 2135 PetscMPIInt size; 2136 2137 PetscFunctionBegin; 2138 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2139 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2140 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2141 if (size > 1) { 2142 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2143 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2144 } else { 2145 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2146 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2147 } 2148 PetscFunctionReturn(0); 2149 } 2150 2151 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2154 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2155 { 2156 Mat mat; 2157 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2158 PetscErrorCode ierr; 2159 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2160 PetscScalar *array; 2161 2162 PetscFunctionBegin; 2163 *newmat = 0; 2164 2165 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2166 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2167 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2168 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2169 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2170 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2171 2172 mat->factortype = matin->factortype; 2173 mat->preallocated = PETSC_TRUE; 2174 mat->assembled = PETSC_TRUE; 2175 mat->insertmode = NOT_SET_VALUES; 2176 2177 a = (Mat_MPISBAIJ*)mat->data; 2178 a->bs2 = oldmat->bs2; 2179 a->mbs = oldmat->mbs; 2180 a->nbs = oldmat->nbs; 2181 a->Mbs = oldmat->Mbs; 2182 a->Nbs = oldmat->Nbs; 2183 2184 2185 a->size = oldmat->size; 2186 a->rank = oldmat->rank; 2187 a->donotstash = oldmat->donotstash; 2188 a->roworiented = oldmat->roworiented; 2189 a->rowindices = 0; 2190 a->rowvalues = 0; 2191 a->getrowactive = PETSC_FALSE; 2192 a->barray = 0; 2193 a->rstartbs = oldmat->rstartbs; 2194 a->rendbs = oldmat->rendbs; 2195 a->cstartbs = oldmat->cstartbs; 2196 a->cendbs = oldmat->cendbs; 2197 2198 /* hash table stuff */ 2199 a->ht = 0; 2200 a->hd = 0; 2201 a->ht_size = 0; 2202 a->ht_flag = oldmat->ht_flag; 2203 a->ht_fact = oldmat->ht_fact; 2204 a->ht_total_ct = 0; 2205 a->ht_insert_ct = 0; 2206 2207 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2208 if (oldmat->colmap) { 2209 #if defined(PETSC_USE_CTABLE) 2210 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2211 #else 2212 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2213 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2214 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2215 #endif 2216 } else a->colmap = 0; 2217 2218 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2219 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2220 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2221 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2222 } else a->garray = 0; 2223 2224 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2225 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2226 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2227 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2228 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2229 2230 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2231 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2232 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2233 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2234 2235 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2236 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2237 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2238 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2239 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2240 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2241 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2242 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2243 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2244 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2245 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2246 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2247 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2248 2249 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2250 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2251 a->sMvctx = oldmat->sMvctx; 2252 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2253 2254 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2255 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2256 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2257 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2258 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2259 *newmat = mat; 2260 PetscFunctionReturn(0); 2261 } 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "MatLoad_MPISBAIJ" 2265 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2266 { 2267 PetscErrorCode ierr; 2268 PetscInt i,nz,j,rstart,rend; 2269 PetscScalar *vals,*buf; 2270 MPI_Comm comm; 2271 MPI_Status status; 2272 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2273 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2274 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2275 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2276 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2277 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2278 int fd; 2279 2280 PetscFunctionBegin; 2281 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2282 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2283 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2284 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2285 if (bs < 0) bs = 1; 2286 2287 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2288 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2289 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2290 if (!rank) { 2291 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2292 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2293 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2294 } 2295 2296 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2297 2298 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2299 M = header[1]; 2300 N = header[2]; 2301 2302 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2303 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2304 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2305 2306 /* If global sizes are set, check if they are consistent with that given in the file */ 2307 if (sizesset) { 2308 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2309 } 2310 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2311 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2312 2313 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2314 2315 /* 2316 This code adds extra rows to make sure the number of rows is 2317 divisible by the blocksize 2318 */ 2319 Mbs = M/bs; 2320 extra_rows = bs - M + bs*(Mbs); 2321 if (extra_rows == bs) extra_rows = 0; 2322 else Mbs++; 2323 if (extra_rows &&!rank) { 2324 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2325 } 2326 2327 /* determine ownership of all rows */ 2328 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2329 mbs = Mbs/size + ((Mbs % size) > rank); 2330 m = mbs*bs; 2331 } else { /* User Set */ 2332 m = newmat->rmap->n; 2333 mbs = m/bs; 2334 } 2335 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2336 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2337 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2338 rowners[0] = 0; 2339 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2340 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2341 rstart = rowners[rank]; 2342 rend = rowners[rank+1]; 2343 2344 /* distribute row lengths to all processors */ 2345 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2346 if (!rank) { 2347 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2348 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2349 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2350 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2351 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2352 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2353 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2354 } else { 2355 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2356 } 2357 2358 if (!rank) { /* procs[0] */ 2359 /* calculate the number of nonzeros on each processor */ 2360 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2361 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2362 for (i=0; i<size; i++) { 2363 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2364 procsnz[i] += rowlengths[j]; 2365 } 2366 } 2367 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2368 2369 /* determine max buffer needed and allocate it */ 2370 maxnz = 0; 2371 for (i=0; i<size; i++) { 2372 maxnz = PetscMax(maxnz,procsnz[i]); 2373 } 2374 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2375 2376 /* read in my part of the matrix column indices */ 2377 nz = procsnz[0]; 2378 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2379 mycols = ibuf; 2380 if (size == 1) nz -= extra_rows; 2381 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2382 if (size == 1) { 2383 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2384 } 2385 2386 /* read in every ones (except the last) and ship off */ 2387 for (i=1; i<size-1; i++) { 2388 nz = procsnz[i]; 2389 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2390 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2391 } 2392 /* read in the stuff for the last proc */ 2393 if (size != 1) { 2394 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2395 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2396 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2397 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2398 } 2399 ierr = PetscFree(cols);CHKERRQ(ierr); 2400 } else { /* procs[i], i>0 */ 2401 /* determine buffer space needed for message */ 2402 nz = 0; 2403 for (i=0; i<m; i++) nz += locrowlens[i]; 2404 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2405 mycols = ibuf; 2406 /* receive message of column indices*/ 2407 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2408 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2409 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2410 } 2411 2412 /* loop over local rows, determining number of off diagonal entries */ 2413 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2414 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2415 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2416 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2417 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2418 rowcount = 0; 2419 nzcount = 0; 2420 for (i=0; i<mbs; i++) { 2421 dcount = 0; 2422 odcount = 0; 2423 for (j=0; j<bs; j++) { 2424 kmax = locrowlens[rowcount]; 2425 for (k=0; k<kmax; k++) { 2426 tmp = mycols[nzcount++]/bs; /* block col. index */ 2427 if (!mask[tmp]) { 2428 mask[tmp] = 1; 2429 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2430 else masked1[dcount++] = tmp; /* entry in diag portion */ 2431 } 2432 } 2433 rowcount++; 2434 } 2435 2436 dlens[i] = dcount; /* d_nzz[i] */ 2437 odlens[i] = odcount; /* o_nzz[i] */ 2438 2439 /* zero out the mask elements we set */ 2440 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2441 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2442 } 2443 if (!sizesset) { 2444 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2445 } 2446 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2447 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2448 2449 if (!rank) { 2450 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2451 /* read in my part of the matrix numerical values */ 2452 nz = procsnz[0]; 2453 vals = buf; 2454 mycols = ibuf; 2455 if (size == 1) nz -= extra_rows; 2456 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2457 if (size == 1) { 2458 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2459 } 2460 2461 /* insert into matrix */ 2462 jj = rstart*bs; 2463 for (i=0; i<m; i++) { 2464 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2465 mycols += locrowlens[i]; 2466 vals += locrowlens[i]; 2467 jj++; 2468 } 2469 2470 /* read in other processors (except the last one) and ship out */ 2471 for (i=1; i<size-1; i++) { 2472 nz = procsnz[i]; 2473 vals = buf; 2474 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2475 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2476 } 2477 /* the last proc */ 2478 if (size != 1) { 2479 nz = procsnz[i] - extra_rows; 2480 vals = buf; 2481 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2482 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2483 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2484 } 2485 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2486 2487 } else { 2488 /* receive numeric values */ 2489 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2490 2491 /* receive message of values*/ 2492 vals = buf; 2493 mycols = ibuf; 2494 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2495 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2496 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2497 2498 /* insert into matrix */ 2499 jj = rstart*bs; 2500 for (i=0; i<m; i++) { 2501 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2502 mycols += locrowlens[i]; 2503 vals += locrowlens[i]; 2504 jj++; 2505 } 2506 } 2507 2508 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2509 ierr = PetscFree(buf);CHKERRQ(ierr); 2510 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2511 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2512 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2513 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2514 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2515 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2516 PetscFunctionReturn(0); 2517 } 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2521 /*XXXXX@ 2522 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2523 2524 Input Parameters: 2525 . mat - the matrix 2526 . fact - factor 2527 2528 Not Collective on Mat, each process can have a different hash factor 2529 2530 Level: advanced 2531 2532 Notes: 2533 This can also be set by the command line option: -mat_use_hash_table fact 2534 2535 .keywords: matrix, hashtable, factor, HT 2536 2537 .seealso: MatSetOption() 2538 @XXXXX*/ 2539 2540 2541 #undef __FUNCT__ 2542 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2543 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2544 { 2545 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2546 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2547 PetscReal atmp; 2548 PetscReal *work,*svalues,*rvalues; 2549 PetscErrorCode ierr; 2550 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2551 PetscMPIInt rank,size; 2552 PetscInt *rowners_bs,dest,count,source; 2553 PetscScalar *va; 2554 MatScalar *ba; 2555 MPI_Status stat; 2556 2557 PetscFunctionBegin; 2558 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2559 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2560 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2561 2562 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2563 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2564 2565 bs = A->rmap->bs; 2566 mbs = a->mbs; 2567 Mbs = a->Mbs; 2568 ba = b->a; 2569 bi = b->i; 2570 bj = b->j; 2571 2572 /* find ownerships */ 2573 rowners_bs = A->rmap->range; 2574 2575 /* each proc creates an array to be distributed */ 2576 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2577 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2578 2579 /* row_max for B */ 2580 if (rank != size-1) { 2581 for (i=0; i<mbs; i++) { 2582 ncols = bi[1] - bi[0]; bi++; 2583 brow = bs*i; 2584 for (j=0; j<ncols; j++) { 2585 bcol = bs*(*bj); 2586 for (kcol=0; kcol<bs; kcol++) { 2587 col = bcol + kcol; /* local col index */ 2588 col += rowners_bs[rank+1]; /* global col index */ 2589 for (krow=0; krow<bs; krow++) { 2590 atmp = PetscAbsScalar(*ba); ba++; 2591 row = brow + krow; /* local row index */ 2592 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2593 if (work[col] < atmp) work[col] = atmp; 2594 } 2595 } 2596 bj++; 2597 } 2598 } 2599 2600 /* send values to its owners */ 2601 for (dest=rank+1; dest<size; dest++) { 2602 svalues = work + rowners_bs[dest]; 2603 count = rowners_bs[dest+1]-rowners_bs[dest]; 2604 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2605 } 2606 } 2607 2608 /* receive values */ 2609 if (rank) { 2610 rvalues = work; 2611 count = rowners_bs[rank+1]-rowners_bs[rank]; 2612 for (source=0; source<rank; source++) { 2613 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2614 /* process values */ 2615 for (i=0; i<count; i++) { 2616 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2617 } 2618 } 2619 } 2620 2621 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2622 ierr = PetscFree(work);CHKERRQ(ierr); 2623 PetscFunctionReturn(0); 2624 } 2625 2626 #undef __FUNCT__ 2627 #define __FUNCT__ "MatSOR_MPISBAIJ" 2628 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2629 { 2630 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2631 PetscErrorCode ierr; 2632 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2633 PetscScalar *x,*ptr,*from; 2634 Vec bb1; 2635 const PetscScalar *b; 2636 2637 PetscFunctionBegin; 2638 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2639 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2640 2641 if (flag == SOR_APPLY_UPPER) { 2642 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2643 PetscFunctionReturn(0); 2644 } 2645 2646 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2647 if (flag & SOR_ZERO_INITIAL_GUESS) { 2648 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2649 its--; 2650 } 2651 2652 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2653 while (its--) { 2654 2655 /* lower triangular part: slvec0b = - B^T*xx */ 2656 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2657 2658 /* copy xx into slvec0a */ 2659 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2660 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2661 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2662 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2663 2664 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2665 2666 /* copy bb into slvec1a */ 2667 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2668 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2669 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2670 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2671 2672 /* set slvec1b = 0 */ 2673 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2674 2675 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2676 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2677 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2678 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2679 2680 /* upper triangular part: bb1 = bb1 - B*x */ 2681 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2682 2683 /* local diagonal sweep */ 2684 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2685 } 2686 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2687 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2688 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2689 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2690 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2691 } else if (flag & SOR_EISENSTAT) { 2692 Vec xx1; 2693 PetscBool hasop; 2694 const PetscScalar *diag; 2695 PetscScalar *sl,scale = (omega - 2.0)/omega; 2696 PetscInt i,n; 2697 2698 if (!mat->xx1) { 2699 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2700 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2701 } 2702 xx1 = mat->xx1; 2703 bb1 = mat->bb1; 2704 2705 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2706 2707 if (!mat->diag) { 2708 /* this is wrong for same matrix with new nonzero values */ 2709 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2710 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2711 } 2712 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2713 2714 if (hasop) { 2715 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2716 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2717 } else { 2718 /* 2719 These two lines are replaced by code that may be a bit faster for a good compiler 2720 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2721 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2722 */ 2723 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2724 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2725 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2726 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2727 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2728 if (omega == 1.0) { 2729 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2730 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2731 } else { 2732 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2733 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2734 } 2735 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2736 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2737 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2738 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2739 } 2740 2741 /* multiply off-diagonal portion of matrix */ 2742 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2743 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2744 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2745 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2746 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2747 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2748 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2749 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2750 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2751 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2752 2753 /* local sweep */ 2754 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2755 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2756 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2757 PetscFunctionReturn(0); 2758 } 2759 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2762 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2763 { 2764 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2765 PetscErrorCode ierr; 2766 Vec lvec1,bb1; 2767 2768 PetscFunctionBegin; 2769 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2770 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2771 2772 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2773 if (flag & SOR_ZERO_INITIAL_GUESS) { 2774 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2775 its--; 2776 } 2777 2778 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2779 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2780 while (its--) { 2781 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2782 2783 /* lower diagonal part: bb1 = bb - B^T*xx */ 2784 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2785 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2786 2787 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2788 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2789 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2790 2791 /* upper diagonal part: bb1 = bb1 - B*x */ 2792 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2793 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2794 2795 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2796 2797 /* diagonal sweep */ 2798 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2799 } 2800 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2801 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2802 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2803 PetscFunctionReturn(0); 2804 } 2805 2806 #undef __FUNCT__ 2807 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2808 /*@ 2809 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2810 CSR format the local rows. 2811 2812 Collective on MPI_Comm 2813 2814 Input Parameters: 2815 + comm - MPI communicator 2816 . bs - the block size, only a block size of 1 is supported 2817 . m - number of local rows (Cannot be PETSC_DECIDE) 2818 . n - This value should be the same as the local size used in creating the 2819 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2820 calculated if N is given) For square matrices n is almost always m. 2821 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2822 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2823 . i - row indices 2824 . j - column indices 2825 - a - matrix values 2826 2827 Output Parameter: 2828 . mat - the matrix 2829 2830 Level: intermediate 2831 2832 Notes: 2833 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2834 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2835 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2836 2837 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2838 2839 .keywords: matrix, aij, compressed row, sparse, parallel 2840 2841 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2842 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2843 @*/ 2844 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2845 { 2846 PetscErrorCode ierr; 2847 2848 2849 PetscFunctionBegin; 2850 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2851 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2852 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2853 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2854 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2855 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2856 PetscFunctionReturn(0); 2857 } 2858 2859 2860 #undef __FUNCT__ 2861 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2862 /*@C 2863 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2864 (the default parallel PETSc format). 2865 2866 Collective on MPI_Comm 2867 2868 Input Parameters: 2869 + B - the matrix 2870 . bs - the block size 2871 . i - the indices into j for the start of each local row (starts with zero) 2872 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2873 - v - optional values in the matrix 2874 2875 Level: developer 2876 2877 .keywords: matrix, aij, compressed row, sparse, parallel 2878 2879 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2880 @*/ 2881 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2882 { 2883 PetscErrorCode ierr; 2884 2885 PetscFunctionBegin; 2886 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2887 PetscFunctionReturn(0); 2888 } 2889 2890 #undef __FUNCT__ 2891 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 2892 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2893 { 2894 PetscErrorCode ierr; 2895 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2896 PetscInt *indx; 2897 PetscScalar *values; 2898 2899 PetscFunctionBegin; 2900 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2901 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2902 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2903 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 2904 PetscInt *bindx,rmax=a->rmax,j; 2905 2906 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2907 mbs = m/bs; Nbs = N/cbs; 2908 if (n == PETSC_DECIDE) { 2909 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 2910 } 2911 /* Check sum(n) = Nbs */ 2912 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2913 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 2914 2915 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2916 rstart -= mbs; 2917 2918 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2919 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 2920 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2921 for (i=0; i<mbs; i++) { 2922 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 2923 nnz = nnz/bs; 2924 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2925 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 2926 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 2927 } 2928 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2929 ierr = PetscFree(bindx);CHKERRQ(ierr); 2930 2931 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2932 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2933 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 2934 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 2935 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2936 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2937 } 2938 2939 /* numeric phase */ 2940 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2941 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 2942 2943 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2944 for (i=0; i<m; i++) { 2945 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2946 Ii = i + rstart; 2947 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2948 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2949 } 2950 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2951 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2952 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2953 PetscFunctionReturn(0); 2954 } 2955