1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 240 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 241 { 242 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 243 const MatScalar *value; 244 MatScalar *barray =baij->barray; 245 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 246 PetscErrorCode ierr; 247 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 248 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 249 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 250 251 PetscFunctionBegin; 252 if (!barray) { 253 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 254 baij->barray = barray; 255 } 256 257 if (roworiented) { 258 stepval = (n-1)*bs; 259 } else { 260 stepval = (m-1)*bs; 261 } 262 for (i=0; i<m; i++) { 263 if (im[i] < 0) continue; 264 #if defined(PETSC_USE_DEBUG) 265 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 266 #endif 267 if (im[i] >= rstart && im[i] < rend) { 268 row = im[i] - rstart; 269 for (j=0; j<n; j++) { 270 if (im[i] > in[j]) { 271 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 272 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 273 } 274 /* If NumCol = 1 then a copy is not required */ 275 if ((roworiented) && (n == 1)) { 276 barray = (MatScalar*) v + i*bs2; 277 } else if ((!roworiented) && (m == 1)) { 278 barray = (MatScalar*) v + j*bs2; 279 } else { /* Here a copy is required */ 280 if (roworiented) { 281 value = v + i*(stepval+bs)*bs + j*bs; 282 } else { 283 value = v + j*(stepval+bs)*bs + i*bs; 284 } 285 for (ii=0; ii<bs; ii++,value+=stepval) { 286 for (jj=0; jj<bs; jj++) { 287 *barray++ = *value++; 288 } 289 } 290 barray -=bs2; 291 } 292 293 if (in[j] >= cstart && in[j] < cend) { 294 col = in[j] - cstart; 295 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 296 } else if (in[j] < 0) continue; 297 #if defined(PETSC_USE_DEBUG) 298 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 299 #endif 300 else { 301 if (mat->was_assembled) { 302 if (!baij->colmap) { 303 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 304 } 305 306 #if defined(PETSC_USE_DEBUG) 307 #if defined(PETSC_USE_CTABLE) 308 { PetscInt data; 309 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 310 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 311 } 312 #else 313 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 314 #endif 315 #endif 316 #if defined(PETSC_USE_CTABLE) 317 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 318 col = (col - 1)/bs; 319 #else 320 col = (baij->colmap[in[j]] - 1)/bs; 321 #endif 322 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 323 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 324 col = in[j]; 325 } 326 } else col = in[j]; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } 329 } 330 } else { 331 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 332 if (!baij->donotstash) { 333 if (roworiented) { 334 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 335 } else { 336 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 337 } 338 } 339 } 340 } 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatGetValues_MPISBAIJ" 346 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 347 { 348 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 349 PetscErrorCode ierr; 350 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 351 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 352 353 PetscFunctionBegin; 354 for (i=0; i<m; i++) { 355 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 356 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 357 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 358 row = idxm[i] - bsrstart; 359 for (j=0; j<n; j++) { 360 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 361 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 362 if (idxn[j] >= bscstart && idxn[j] < bscend) { 363 col = idxn[j] - bscstart; 364 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 365 } else { 366 if (!baij->colmap) { 367 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 368 } 369 #if defined(PETSC_USE_CTABLE) 370 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 371 data--; 372 #else 373 data = baij->colmap[idxn[j]/bs]-1; 374 #endif 375 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 376 else { 377 col = data + idxn[j]%bs; 378 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 379 } 380 } 381 } 382 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 383 } 384 PetscFunctionReturn(0); 385 } 386 387 #undef __FUNCT__ 388 #define __FUNCT__ "MatNorm_MPISBAIJ" 389 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 390 { 391 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 392 PetscErrorCode ierr; 393 PetscReal sum[2],*lnorm2; 394 395 PetscFunctionBegin; 396 if (baij->size == 1) { 397 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 398 } else { 399 if (type == NORM_FROBENIUS) { 400 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 401 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 402 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 403 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 404 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 405 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 406 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 407 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 408 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 409 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 410 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 411 PetscReal *rsum,*rsum2,vabs; 412 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 413 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 414 MatScalar *v; 415 416 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 417 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 418 /* Amat */ 419 v = amat->a; jj = amat->j; 420 for (brow=0; brow<mbs; brow++) { 421 grow = bs*(rstart + brow); 422 nz = amat->i[brow+1] - amat->i[brow]; 423 for (bcol=0; bcol<nz; bcol++) { 424 gcol = bs*(rstart + *jj); jj++; 425 for (col=0; col<bs; col++) { 426 for (row=0; row<bs; row++) { 427 vabs = PetscAbsScalar(*v); v++; 428 rsum[gcol+col] += vabs; 429 /* non-diagonal block */ 430 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 431 } 432 } 433 } 434 } 435 /* Bmat */ 436 v = bmat->a; jj = bmat->j; 437 for (brow=0; brow<mbs; brow++) { 438 grow = bs*(rstart + brow); 439 nz = bmat->i[brow+1] - bmat->i[brow]; 440 for (bcol=0; bcol<nz; bcol++) { 441 gcol = bs*garray[*jj]; jj++; 442 for (col=0; col<bs; col++) { 443 for (row=0; row<bs; row++) { 444 vabs = PetscAbsScalar(*v); v++; 445 rsum[gcol+col] += vabs; 446 rsum[grow+row] += vabs; 447 } 448 } 449 } 450 } 451 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 452 *norm = 0.0; 453 for (col=0; col<mat->cmap->N; col++) { 454 if (rsum2[col] > *norm) *norm = rsum2[col]; 455 } 456 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 457 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 458 } 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 464 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 465 { 466 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 467 PetscErrorCode ierr; 468 PetscInt nstash,reallocs; 469 InsertMode addv; 470 471 PetscFunctionBegin; 472 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 473 474 /* make sure all processors are either in INSERTMODE or ADDMODE */ 475 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 476 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 477 mat->insertmode = addv; /* in case this processor had no cache */ 478 479 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 480 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 481 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 482 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 483 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 484 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 490 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 491 { 492 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 493 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 494 PetscErrorCode ierr; 495 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 496 PetscInt *row,*col; 497 PetscBool other_disassembled; 498 PetscMPIInt n; 499 PetscBool r1,r2,r3; 500 MatScalar *val; 501 InsertMode addv = mat->insertmode; 502 503 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 504 PetscFunctionBegin; 505 if (!baij->donotstash && !mat->nooffprocentries) { 506 while (1) { 507 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 508 if (!flg) break; 509 510 for (i=0; i<n;) { 511 /* Now identify the consecutive vals belonging to the same row */ 512 for (j=i,rstart=row[j]; j<n; j++) { 513 if (row[j] != rstart) break; 514 } 515 if (j < n) ncols = j-i; 516 else ncols = n-i; 517 /* Now assemble all these values with a single function call */ 518 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 519 i = j; 520 } 521 } 522 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 523 /* Now process the block-stash. Since the values are stashed column-oriented, 524 set the roworiented flag to column oriented, and after MatSetValues() 525 restore the original flags */ 526 r1 = baij->roworiented; 527 r2 = a->roworiented; 528 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 529 530 baij->roworiented = PETSC_FALSE; 531 a->roworiented = PETSC_FALSE; 532 533 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 534 while (1) { 535 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 536 if (!flg) break; 537 538 for (i=0; i<n;) { 539 /* Now identify the consecutive vals belonging to the same row */ 540 for (j=i,rstart=row[j]; j<n; j++) { 541 if (row[j] != rstart) break; 542 } 543 if (j < n) ncols = j-i; 544 else ncols = n-i; 545 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 546 i = j; 547 } 548 } 549 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 550 551 baij->roworiented = r1; 552 a->roworiented = r2; 553 554 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 555 } 556 557 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 558 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 559 560 /* determine if any processor has disassembled, if so we must 561 also disassemble ourselfs, in order that we may reassemble. */ 562 /* 563 if nonzero structure of submatrix B cannot change then we know that 564 no processor disassembled thus we can skip this stuff 565 */ 566 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 567 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 568 if (mat->was_assembled && !other_disassembled) { 569 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 570 } 571 } 572 573 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 574 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 575 } 576 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 577 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 578 579 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 580 581 baij->rowvalues = 0; 582 583 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 584 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 585 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 586 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 587 } 588 PetscFunctionReturn(0); 589 } 590 591 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 592 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 593 #include <petscdraw.h> 594 #undef __FUNCT__ 595 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 596 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 597 { 598 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 599 PetscErrorCode ierr; 600 PetscInt bs = mat->rmap->bs; 601 PetscMPIInt rank = baij->rank; 602 PetscBool iascii,isdraw; 603 PetscViewer sviewer; 604 PetscViewerFormat format; 605 606 PetscFunctionBegin; 607 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 608 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 609 if (iascii) { 610 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 611 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 612 MatInfo info; 613 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 614 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 615 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 616 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 617 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 618 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 619 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 620 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 621 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 622 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 623 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 624 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 625 PetscFunctionReturn(0); 626 } else if (format == PETSC_VIEWER_ASCII_INFO) { 627 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 630 PetscFunctionReturn(0); 631 } 632 } 633 634 if (isdraw) { 635 PetscDraw draw; 636 PetscBool isnull; 637 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 638 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 639 } 640 641 { 642 /* assemble the entire matrix onto first processor. */ 643 Mat A; 644 Mat_SeqSBAIJ *Aloc; 645 Mat_SeqBAIJ *Bloc; 646 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 647 MatScalar *a; 648 const char *matname; 649 650 /* Should this be the same type as mat? */ 651 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 652 if (!rank) { 653 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 654 } else { 655 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 656 } 657 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 658 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 659 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 660 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 661 662 /* copy over the A part */ 663 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 664 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 665 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 666 667 for (i=0; i<mbs; i++) { 668 rvals[0] = bs*(baij->rstartbs + i); 669 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 670 for (j=ai[i]; j<ai[i+1]; j++) { 671 col = (baij->cstartbs+aj[j])*bs; 672 for (k=0; k<bs; k++) { 673 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 674 col++; 675 a += bs; 676 } 677 } 678 } 679 /* copy over the B part */ 680 Bloc = (Mat_SeqBAIJ*)baij->B->data; 681 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 682 for (i=0; i<mbs; i++) { 683 684 rvals[0] = bs*(baij->rstartbs + i); 685 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 686 for (j=ai[i]; j<ai[i+1]; j++) { 687 col = baij->garray[aj[j]]*bs; 688 for (k=0; k<bs; k++) { 689 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 690 col++; 691 a += bs; 692 } 693 } 694 } 695 ierr = PetscFree(rvals);CHKERRQ(ierr); 696 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 697 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 698 /* 699 Everyone has to call to draw the matrix since the graphics waits are 700 synchronized across all processors that share the PetscDraw object 701 */ 702 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 703 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 704 if (!rank) { 705 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 706 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 707 } 708 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 709 ierr = MatDestroy(&A);CHKERRQ(ierr); 710 } 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "MatView_MPISBAIJ" 716 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 717 { 718 PetscErrorCode ierr; 719 PetscBool iascii,isdraw,issocket,isbinary; 720 721 PetscFunctionBegin; 722 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 723 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 724 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 725 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 726 if (iascii || isdraw || issocket || isbinary) { 727 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 728 } 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatDestroy_MPISBAIJ" 734 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 735 { 736 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 #if defined(PETSC_USE_LOG) 741 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 742 #endif 743 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 744 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 745 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 746 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 747 #if defined(PETSC_USE_CTABLE) 748 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 749 #else 750 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 751 #endif 752 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 753 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 754 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 755 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 756 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 757 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 758 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 759 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 760 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 761 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 762 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 763 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 764 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 765 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 766 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 767 #if defined(PETSC_USE_REAL_MAT_SINGLE) 768 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 769 #endif 770 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 771 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 772 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 773 ierr = PetscFree(mat->data);CHKERRQ(ierr); 774 775 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 776 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 777 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 778 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 779 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 780 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 781 #if defined(PETSC_HAVE_ELEMENTAL) 782 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 783 #endif 784 PetscFunctionReturn(0); 785 } 786 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 789 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 790 { 791 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 792 PetscErrorCode ierr; 793 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 794 PetscScalar *x,*from; 795 796 PetscFunctionBegin; 797 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 798 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 799 800 /* diagonal part */ 801 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 802 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 803 804 /* subdiagonal part */ 805 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 806 807 /* copy x into the vec slvec0 */ 808 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 809 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 810 811 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 812 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 813 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 814 815 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 816 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 817 /* supperdiagonal part */ 818 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMult_MPISBAIJ" 824 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 825 { 826 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 827 PetscErrorCode ierr; 828 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 829 PetscScalar *from; 830 const PetscScalar *x; 831 832 PetscFunctionBegin; 833 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 834 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 835 836 /* diagonal part */ 837 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 838 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 839 840 /* subdiagonal part */ 841 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 842 843 /* copy x into the vec slvec0 */ 844 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 845 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 846 847 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 848 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 849 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 850 851 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 852 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 853 /* supperdiagonal part */ 854 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 860 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 861 { 862 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 863 PetscErrorCode ierr; 864 PetscInt nt; 865 866 PetscFunctionBegin; 867 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 868 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 869 870 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 871 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 872 873 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 /* do diagonal part */ 875 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 876 /* do supperdiagonal part */ 877 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 879 /* do subdiagonal part */ 880 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 881 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 882 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 888 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 889 { 890 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 891 PetscErrorCode ierr; 892 PetscInt mbs=a->mbs,bs=A->rmap->bs; 893 PetscScalar *from,zero=0.0; 894 const PetscScalar *x; 895 896 PetscFunctionBegin; 897 /* 898 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 899 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 900 */ 901 /* diagonal part */ 902 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 903 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 904 905 /* subdiagonal part */ 906 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 907 908 /* copy x into the vec slvec0 */ 909 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 910 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 911 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 912 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 913 914 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 916 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 918 /* supperdiagonal part */ 919 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 920 PetscFunctionReturn(0); 921 } 922 923 #undef __FUNCT__ 924 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 925 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 926 { 927 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 928 PetscErrorCode ierr; 929 930 PetscFunctionBegin; 931 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 932 /* do diagonal part */ 933 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 934 /* do supperdiagonal part */ 935 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 936 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 937 938 /* do subdiagonal part */ 939 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 940 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 941 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 942 PetscFunctionReturn(0); 943 } 944 945 /* 946 This only works correctly for square matrices where the subblock A->A is the 947 diagonal block 948 */ 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 951 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 952 { 953 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 958 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatScale_MPISBAIJ" 964 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 965 { 966 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 967 PetscErrorCode ierr; 968 969 PetscFunctionBegin; 970 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 971 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatGetRow_MPISBAIJ" 977 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 978 { 979 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 980 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 981 PetscErrorCode ierr; 982 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 983 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 984 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 985 986 PetscFunctionBegin; 987 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 988 mat->getrowactive = PETSC_TRUE; 989 990 if (!mat->rowvalues && (idx || v)) { 991 /* 992 allocate enough space to hold information from the longest row. 993 */ 994 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 995 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 996 PetscInt max = 1,mbs = mat->mbs,tmp; 997 for (i=0; i<mbs; i++) { 998 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 999 if (max < tmp) max = tmp; 1000 } 1001 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1002 } 1003 1004 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1005 lrow = row - brstart; /* local row index */ 1006 1007 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1008 if (!v) {pvA = 0; pvB = 0;} 1009 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1010 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1011 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1012 nztot = nzA + nzB; 1013 1014 cmap = mat->garray; 1015 if (v || idx) { 1016 if (nztot) { 1017 /* Sort by increasing column numbers, assuming A and B already sorted */ 1018 PetscInt imark = -1; 1019 if (v) { 1020 *v = v_p = mat->rowvalues; 1021 for (i=0; i<nzB; i++) { 1022 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1023 else break; 1024 } 1025 imark = i; 1026 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1027 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1028 } 1029 if (idx) { 1030 *idx = idx_p = mat->rowindices; 1031 if (imark > -1) { 1032 for (i=0; i<imark; i++) { 1033 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1034 } 1035 } else { 1036 for (i=0; i<nzB; i++) { 1037 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1038 else break; 1039 } 1040 imark = i; 1041 } 1042 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1043 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1044 } 1045 } else { 1046 if (idx) *idx = 0; 1047 if (v) *v = 0; 1048 } 1049 } 1050 *nz = nztot; 1051 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1052 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1058 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1059 { 1060 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1061 1062 PetscFunctionBegin; 1063 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1064 baij->getrowactive = PETSC_FALSE; 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1070 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1071 { 1072 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1073 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1074 1075 PetscFunctionBegin; 1076 aA->getrow_utriangular = PETSC_TRUE; 1077 PetscFunctionReturn(0); 1078 } 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1081 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1082 { 1083 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1084 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1085 1086 PetscFunctionBegin; 1087 aA->getrow_utriangular = PETSC_FALSE; 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1093 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1094 { 1095 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1100 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1106 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1107 { 1108 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1113 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1118 Input: isrow - distributed(parallel), 1119 iscol_local - locally owned (seq) 1120 */ 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "ISEqual_private" 1123 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1124 { 1125 PetscErrorCode ierr; 1126 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1127 const PetscInt *ptr1,*ptr2; 1128 1129 PetscFunctionBegin; 1130 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1131 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1132 if (sz1 > sz2) { 1133 *flg = PETSC_FALSE; 1134 PetscFunctionReturn(0); 1135 } 1136 1137 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1138 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1139 1140 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1142 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1143 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1144 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1145 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1146 1147 nmatch=0; 1148 k = 0; 1149 for (i=0; i<sz1; i++){ 1150 for (j=k; j<sz2; j++){ 1151 if (a1[i] == a2[j]) { 1152 k = j; nmatch++; 1153 break; 1154 } 1155 } 1156 } 1157 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1158 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1159 ierr = PetscFree(a1);CHKERRQ(ierr); 1160 ierr = PetscFree(a2);CHKERRQ(ierr); 1161 if (nmatch < sz1) { 1162 *flg = PETSC_FALSE; 1163 } else { 1164 *flg = PETSC_TRUE; 1165 } 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1171 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1172 { 1173 PetscErrorCode ierr; 1174 IS iscol_local; 1175 PetscInt csize; 1176 PetscBool isequal; 1177 1178 PetscFunctionBegin; 1179 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1180 if (call == MAT_REUSE_MATRIX) { 1181 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1182 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1183 } else { 1184 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1185 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1186 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1187 } 1188 1189 /* now call MatGetSubMatrix_MPIBAIJ() */ 1190 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1191 if (call == MAT_INITIAL_MATRIX) { 1192 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1193 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1200 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1201 { 1202 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1207 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1213 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1214 { 1215 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1216 Mat A = a->A,B = a->B; 1217 PetscErrorCode ierr; 1218 PetscReal isend[5],irecv[5]; 1219 1220 PetscFunctionBegin; 1221 info->block_size = (PetscReal)matin->rmap->bs; 1222 1223 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1224 1225 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1226 isend[3] = info->memory; isend[4] = info->mallocs; 1227 1228 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1229 1230 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1231 isend[3] += info->memory; isend[4] += info->mallocs; 1232 if (flag == MAT_LOCAL) { 1233 info->nz_used = isend[0]; 1234 info->nz_allocated = isend[1]; 1235 info->nz_unneeded = isend[2]; 1236 info->memory = isend[3]; 1237 info->mallocs = isend[4]; 1238 } else if (flag == MAT_GLOBAL_MAX) { 1239 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1240 1241 info->nz_used = irecv[0]; 1242 info->nz_allocated = irecv[1]; 1243 info->nz_unneeded = irecv[2]; 1244 info->memory = irecv[3]; 1245 info->mallocs = irecv[4]; 1246 } else if (flag == MAT_GLOBAL_SUM) { 1247 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1248 1249 info->nz_used = irecv[0]; 1250 info->nz_allocated = irecv[1]; 1251 info->nz_unneeded = irecv[2]; 1252 info->memory = irecv[3]; 1253 info->mallocs = irecv[4]; 1254 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1255 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1256 info->fill_ratio_needed = 0; 1257 info->factor_mallocs = 0; 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1263 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1264 { 1265 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1266 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1267 PetscErrorCode ierr; 1268 1269 PetscFunctionBegin; 1270 switch (op) { 1271 case MAT_NEW_NONZERO_LOCATIONS: 1272 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1273 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1274 case MAT_KEEP_NONZERO_PATTERN: 1275 case MAT_NEW_NONZERO_LOCATION_ERR: 1276 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1277 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1278 break; 1279 case MAT_ROW_ORIENTED: 1280 a->roworiented = flg; 1281 1282 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1283 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1284 break; 1285 case MAT_NEW_DIAGONALS: 1286 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1287 break; 1288 case MAT_IGNORE_OFF_PROC_ENTRIES: 1289 a->donotstash = flg; 1290 break; 1291 case MAT_USE_HASH_TABLE: 1292 a->ht_flag = flg; 1293 break; 1294 case MAT_HERMITIAN: 1295 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1296 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1297 1298 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1299 break; 1300 case MAT_SPD: 1301 A->spd_set = PETSC_TRUE; 1302 A->spd = flg; 1303 if (flg) { 1304 A->symmetric = PETSC_TRUE; 1305 A->structurally_symmetric = PETSC_TRUE; 1306 A->symmetric_set = PETSC_TRUE; 1307 A->structurally_symmetric_set = PETSC_TRUE; 1308 } 1309 break; 1310 case MAT_SYMMETRIC: 1311 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1312 break; 1313 case MAT_STRUCTURALLY_SYMMETRIC: 1314 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1315 break; 1316 case MAT_SYMMETRY_ETERNAL: 1317 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1318 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1319 break; 1320 case MAT_IGNORE_LOWER_TRIANGULAR: 1321 aA->ignore_ltriangular = flg; 1322 break; 1323 case MAT_ERROR_LOWER_TRIANGULAR: 1324 aA->ignore_ltriangular = flg; 1325 break; 1326 case MAT_GETROW_UPPERTRIANGULAR: 1327 aA->getrow_utriangular = flg; 1328 break; 1329 default: 1330 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1331 } 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1337 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1338 { 1339 PetscErrorCode ierr; 1340 1341 PetscFunctionBegin; 1342 if (MAT_INITIAL_MATRIX || *B != A) { 1343 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1344 } 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1350 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1351 { 1352 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1353 Mat a = baij->A, b=baij->B; 1354 PetscErrorCode ierr; 1355 PetscInt nv,m,n; 1356 PetscBool flg; 1357 1358 PetscFunctionBegin; 1359 if (ll != rr) { 1360 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1361 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1362 } 1363 if (!ll) PetscFunctionReturn(0); 1364 1365 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1366 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1367 1368 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1369 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1370 1371 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1372 1373 /* left diagonalscale the off-diagonal part */ 1374 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1375 1376 /* scale the diagonal part */ 1377 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1378 1379 /* right diagonalscale the off-diagonal part */ 1380 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1381 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1387 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1388 { 1389 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1390 PetscErrorCode ierr; 1391 1392 PetscFunctionBegin; 1393 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1394 PetscFunctionReturn(0); 1395 } 1396 1397 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatEqual_MPISBAIJ" 1401 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1402 { 1403 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1404 Mat a,b,c,d; 1405 PetscBool flg; 1406 PetscErrorCode ierr; 1407 1408 PetscFunctionBegin; 1409 a = matA->A; b = matA->B; 1410 c = matB->A; d = matB->B; 1411 1412 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1413 if (flg) { 1414 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1415 } 1416 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatCopy_MPISBAIJ" 1422 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1423 { 1424 PetscErrorCode ierr; 1425 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1426 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1427 1428 PetscFunctionBegin; 1429 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1430 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1431 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1432 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1433 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1434 } else { 1435 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1436 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1437 } 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1443 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1444 { 1445 PetscErrorCode ierr; 1446 1447 PetscFunctionBegin; 1448 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1454 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1455 { 1456 PetscErrorCode ierr; 1457 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1458 PetscBLASInt bnz,one=1; 1459 Mat_SeqSBAIJ *xa,*ya; 1460 Mat_SeqBAIJ *xb,*yb; 1461 1462 PetscFunctionBegin; 1463 if (str == SAME_NONZERO_PATTERN) { 1464 PetscScalar alpha = a; 1465 xa = (Mat_SeqSBAIJ*)xx->A->data; 1466 ya = (Mat_SeqSBAIJ*)yy->A->data; 1467 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1468 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1469 xb = (Mat_SeqBAIJ*)xx->B->data; 1470 yb = (Mat_SeqBAIJ*)yy->B->data; 1471 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1472 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1473 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1474 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1475 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1476 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1477 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1478 } else { 1479 Mat B; 1480 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1481 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1482 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1483 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1484 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1485 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1486 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1487 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1488 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1489 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1490 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1491 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1492 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1493 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1494 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1495 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1496 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1497 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1498 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1499 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1506 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1507 { 1508 PetscErrorCode ierr; 1509 PetscInt i; 1510 PetscBool flg,sorted; 1511 1512 PetscFunctionBegin; 1513 for (i = 0; i < n; i++) { 1514 ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr); 1515 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i); 1516 ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr); 1517 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i); 1518 } 1519 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1520 for (i=0; i<n; i++) { 1521 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1522 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1523 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1524 } 1525 } 1526 PetscFunctionReturn(0); 1527 } 1528 1529 /* -------------------------------------------------------------------*/ 1530 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1531 MatGetRow_MPISBAIJ, 1532 MatRestoreRow_MPISBAIJ, 1533 MatMult_MPISBAIJ, 1534 /* 4*/ MatMultAdd_MPISBAIJ, 1535 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1536 MatMultAdd_MPISBAIJ, 1537 0, 1538 0, 1539 0, 1540 /* 10*/ 0, 1541 0, 1542 0, 1543 MatSOR_MPISBAIJ, 1544 MatTranspose_MPISBAIJ, 1545 /* 15*/ MatGetInfo_MPISBAIJ, 1546 MatEqual_MPISBAIJ, 1547 MatGetDiagonal_MPISBAIJ, 1548 MatDiagonalScale_MPISBAIJ, 1549 MatNorm_MPISBAIJ, 1550 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1551 MatAssemblyEnd_MPISBAIJ, 1552 MatSetOption_MPISBAIJ, 1553 MatZeroEntries_MPISBAIJ, 1554 /* 24*/ 0, 1555 0, 1556 0, 1557 0, 1558 0, 1559 /* 29*/ MatSetUp_MPISBAIJ, 1560 0, 1561 0, 1562 0, 1563 0, 1564 /* 34*/ MatDuplicate_MPISBAIJ, 1565 0, 1566 0, 1567 0, 1568 0, 1569 /* 39*/ MatAXPY_MPISBAIJ, 1570 MatGetSubMatrices_MPISBAIJ, 1571 MatIncreaseOverlap_MPISBAIJ, 1572 MatGetValues_MPISBAIJ, 1573 MatCopy_MPISBAIJ, 1574 /* 44*/ 0, 1575 MatScale_MPISBAIJ, 1576 0, 1577 0, 1578 0, 1579 /* 49*/ 0, 1580 0, 1581 0, 1582 0, 1583 0, 1584 /* 54*/ 0, 1585 0, 1586 MatSetUnfactored_MPISBAIJ, 1587 0, 1588 MatSetValuesBlocked_MPISBAIJ, 1589 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1590 0, 1591 0, 1592 0, 1593 0, 1594 /* 64*/ 0, 1595 0, 1596 0, 1597 0, 1598 0, 1599 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1600 0, 1601 0, 1602 0, 1603 0, 1604 /* 74*/ 0, 1605 0, 1606 0, 1607 0, 1608 0, 1609 /* 79*/ 0, 1610 0, 1611 0, 1612 0, 1613 MatLoad_MPISBAIJ, 1614 /* 84*/ 0, 1615 0, 1616 0, 1617 0, 1618 0, 1619 /* 89*/ 0, 1620 0, 1621 0, 1622 0, 1623 0, 1624 /* 94*/ 0, 1625 0, 1626 0, 1627 0, 1628 0, 1629 /* 99*/ 0, 1630 0, 1631 0, 1632 0, 1633 0, 1634 /*104*/ 0, 1635 MatRealPart_MPISBAIJ, 1636 MatImaginaryPart_MPISBAIJ, 1637 MatGetRowUpperTriangular_MPISBAIJ, 1638 MatRestoreRowUpperTriangular_MPISBAIJ, 1639 /*109*/ 0, 1640 0, 1641 0, 1642 0, 1643 0, 1644 /*114*/ 0, 1645 0, 1646 0, 1647 0, 1648 0, 1649 /*119*/ 0, 1650 0, 1651 0, 1652 0, 1653 0, 1654 /*124*/ 0, 1655 0, 1656 0, 1657 0, 1658 0, 1659 /*129*/ 0, 1660 0, 1661 0, 1662 0, 1663 0, 1664 /*134*/ 0, 1665 0, 1666 0, 1667 0, 1668 0, 1669 /*139*/ 0, 1670 0, 1671 0, 1672 0, 1673 0, 1674 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1675 }; 1676 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1679 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1680 { 1681 PetscFunctionBegin; 1682 *a = ((Mat_MPISBAIJ*)A->data)->A; 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1688 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1689 { 1690 Mat_MPISBAIJ *b; 1691 PetscErrorCode ierr; 1692 PetscInt i,mbs,Mbs; 1693 1694 PetscFunctionBegin; 1695 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1696 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1697 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1698 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1699 1700 b = (Mat_MPISBAIJ*)B->data; 1701 mbs = B->rmap->n/bs; 1702 Mbs = B->rmap->N/bs; 1703 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1704 1705 B->rmap->bs = bs; 1706 b->bs2 = bs*bs; 1707 b->mbs = mbs; 1708 b->Mbs = Mbs; 1709 b->nbs = B->cmap->n/bs; 1710 b->Nbs = B->cmap->N/bs; 1711 1712 for (i=0; i<=b->size; i++) { 1713 b->rangebs[i] = B->rmap->range[i]/bs; 1714 } 1715 b->rstartbs = B->rmap->rstart/bs; 1716 b->rendbs = B->rmap->rend/bs; 1717 1718 b->cstartbs = B->cmap->rstart/bs; 1719 b->cendbs = B->cmap->rend/bs; 1720 1721 if (!B->preallocated) { 1722 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1723 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1724 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1725 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1726 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1727 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1728 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1729 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1730 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1731 } 1732 1733 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1734 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1735 1736 B->preallocated = PETSC_TRUE; 1737 PetscFunctionReturn(0); 1738 } 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1742 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1743 { 1744 PetscInt m,rstart,cstart,cend; 1745 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1746 const PetscInt *JJ =0; 1747 PetscScalar *values=0; 1748 PetscErrorCode ierr; 1749 1750 PetscFunctionBegin; 1751 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1752 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1753 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1754 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1755 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1756 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1757 m = B->rmap->n/bs; 1758 rstart = B->rmap->rstart/bs; 1759 cstart = B->cmap->rstart/bs; 1760 cend = B->cmap->rend/bs; 1761 1762 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1763 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1764 for (i=0; i<m; i++) { 1765 nz = ii[i+1] - ii[i]; 1766 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1767 nz_max = PetscMax(nz_max,nz); 1768 JJ = jj + ii[i]; 1769 for (j=0; j<nz; j++) { 1770 if (*JJ >= cstart) break; 1771 JJ++; 1772 } 1773 d = 0; 1774 for (; j<nz; j++) { 1775 if (*JJ++ >= cend) break; 1776 d++; 1777 } 1778 d_nnz[i] = d; 1779 o_nnz[i] = nz - d; 1780 } 1781 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1782 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1783 1784 values = (PetscScalar*)V; 1785 if (!values) { 1786 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 1787 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1788 } 1789 for (i=0; i<m; i++) { 1790 PetscInt row = i + rstart; 1791 PetscInt ncols = ii[i+1] - ii[i]; 1792 const PetscInt *icols = jj + ii[i]; 1793 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1794 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1795 } 1796 1797 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1798 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1799 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1800 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1801 PetscFunctionReturn(0); 1802 } 1803 1804 /*MC 1805 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1806 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1807 the matrix is stored. 1808 1809 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1810 can call MatSetOption(Mat, MAT_HERMITIAN); 1811 1812 Options Database Keys: 1813 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1814 1815 Level: beginner 1816 1817 .seealso: MatCreateMPISBAIJ 1818 M*/ 1819 1820 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 1821 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatCreate_MPISBAIJ" 1824 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1825 { 1826 Mat_MPISBAIJ *b; 1827 PetscErrorCode ierr; 1828 PetscBool flg = PETSC_FALSE; 1829 1830 PetscFunctionBegin; 1831 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1832 B->data = (void*)b; 1833 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1834 1835 B->ops->destroy = MatDestroy_MPISBAIJ; 1836 B->ops->view = MatView_MPISBAIJ; 1837 B->assembled = PETSC_FALSE; 1838 B->insertmode = NOT_SET_VALUES; 1839 1840 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 1841 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 1842 1843 /* build local table of row and column ownerships */ 1844 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 1845 1846 /* build cache for off array entries formed */ 1847 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 1848 1849 b->donotstash = PETSC_FALSE; 1850 b->colmap = NULL; 1851 b->garray = NULL; 1852 b->roworiented = PETSC_TRUE; 1853 1854 /* stuff used in block assembly */ 1855 b->barray = 0; 1856 1857 /* stuff used for matrix vector multiply */ 1858 b->lvec = 0; 1859 b->Mvctx = 0; 1860 b->slvec0 = 0; 1861 b->slvec0b = 0; 1862 b->slvec1 = 0; 1863 b->slvec1a = 0; 1864 b->slvec1b = 0; 1865 b->sMvctx = 0; 1866 1867 /* stuff for MatGetRow() */ 1868 b->rowindices = 0; 1869 b->rowvalues = 0; 1870 b->getrowactive = PETSC_FALSE; 1871 1872 /* hash table stuff */ 1873 b->ht = 0; 1874 b->hd = 0; 1875 b->ht_size = 0; 1876 b->ht_flag = PETSC_FALSE; 1877 b->ht_fact = 0; 1878 b->ht_total_ct = 0; 1879 b->ht_insert_ct = 0; 1880 1881 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1882 b->ijonly = PETSC_FALSE; 1883 1884 b->in_loc = 0; 1885 b->v_loc = 0; 1886 b->n_loc = 0; 1887 1888 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1889 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1890 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1891 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1892 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1893 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1894 #if defined(PETSC_HAVE_ELEMENTAL) 1895 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 1896 #endif 1897 1898 B->symmetric = PETSC_TRUE; 1899 B->structurally_symmetric = PETSC_TRUE; 1900 B->symmetric_set = PETSC_TRUE; 1901 B->structurally_symmetric_set = PETSC_TRUE; 1902 1903 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1904 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1905 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 1906 if (flg) { 1907 PetscReal fact = 1.39; 1908 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1909 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 1910 if (fact <= 1.0) fact = 1.39; 1911 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1912 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1913 } 1914 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1915 PetscFunctionReturn(0); 1916 } 1917 1918 /*MC 1919 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1920 1921 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1922 and MATMPISBAIJ otherwise. 1923 1924 Options Database Keys: 1925 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1926 1927 Level: beginner 1928 1929 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1930 M*/ 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1934 /*@C 1935 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1936 the user should preallocate the matrix storage by setting the parameters 1937 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1938 performance can be increased by more than a factor of 50. 1939 1940 Collective on Mat 1941 1942 Input Parameters: 1943 + B - the matrix 1944 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 1945 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 1946 . d_nz - number of block nonzeros per block row in diagonal portion of local 1947 submatrix (same for all local rows) 1948 . d_nnz - array containing the number of block nonzeros in the various block rows 1949 in the upper triangular and diagonal part of the in diagonal portion of the local 1950 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 1951 for the diagonal entry and set a value even if it is zero. 1952 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1953 submatrix (same for all local rows). 1954 - o_nnz - array containing the number of nonzeros in the various block rows of the 1955 off-diagonal portion of the local submatrix that is right of the diagonal 1956 (possibly different for each block row) or NULL. 1957 1958 1959 Options Database Keys: 1960 . -mat_no_unroll - uses code that does not unroll the loops in the 1961 block calculations (much slower) 1962 . -mat_block_size - size of the blocks to use 1963 1964 Notes: 1965 1966 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1967 than it must be used on all processors that share the object for that argument. 1968 1969 If the *_nnz parameter is given then the *_nz parameter is ignored 1970 1971 Storage Information: 1972 For a square global matrix we define each processor's diagonal portion 1973 to be its local rows and the corresponding columns (a square submatrix); 1974 each processor's off-diagonal portion encompasses the remainder of the 1975 local matrix (a rectangular submatrix). 1976 1977 The user can specify preallocated storage for the diagonal part of 1978 the local submatrix with either d_nz or d_nnz (not both). Set 1979 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 1980 memory allocation. Likewise, specify preallocated storage for the 1981 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1982 1983 You can call MatGetInfo() to get information on how effective the preallocation was; 1984 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1985 You can also run with the option -info and look for messages with the string 1986 malloc in them to see if additional memory allocation was needed. 1987 1988 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1989 the figure below we depict these three local rows and all columns (0-11). 1990 1991 .vb 1992 0 1 2 3 4 5 6 7 8 9 10 11 1993 -------------------------- 1994 row 3 |. . . d d d o o o o o o 1995 row 4 |. . . d d d o o o o o o 1996 row 5 |. . . d d d o o o o o o 1997 -------------------------- 1998 .ve 1999 2000 Thus, any entries in the d locations are stored in the d (diagonal) 2001 submatrix, and any entries in the o locations are stored in the 2002 o (off-diagonal) submatrix. Note that the d matrix is stored in 2003 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2004 2005 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2006 plus the diagonal part of the d matrix, 2007 and o_nz should indicate the number of block nonzeros per row in the o matrix 2008 2009 In general, for PDE problems in which most nonzeros are near the diagonal, 2010 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2011 or you will get TERRIBLE performance; see the users' manual chapter on 2012 matrices. 2013 2014 Level: intermediate 2015 2016 .keywords: matrix, block, aij, compressed row, sparse, parallel 2017 2018 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2019 @*/ 2020 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2021 { 2022 PetscErrorCode ierr; 2023 2024 PetscFunctionBegin; 2025 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2026 PetscValidType(B,1); 2027 PetscValidLogicalCollectiveInt(B,bs,2); 2028 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2029 PetscFunctionReturn(0); 2030 } 2031 2032 #undef __FUNCT__ 2033 #define __FUNCT__ "MatCreateSBAIJ" 2034 /*@C 2035 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2036 (block compressed row). For good matrix assembly performance 2037 the user should preallocate the matrix storage by setting the parameters 2038 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2039 performance can be increased by more than a factor of 50. 2040 2041 Collective on MPI_Comm 2042 2043 Input Parameters: 2044 + comm - MPI communicator 2045 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2046 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2047 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2048 This value should be the same as the local size used in creating the 2049 y vector for the matrix-vector product y = Ax. 2050 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2051 This value should be the same as the local size used in creating the 2052 x vector for the matrix-vector product y = Ax. 2053 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2054 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2055 . d_nz - number of block nonzeros per block row in diagonal portion of local 2056 submatrix (same for all local rows) 2057 . d_nnz - array containing the number of block nonzeros in the various block rows 2058 in the upper triangular portion of the in diagonal portion of the local 2059 (possibly different for each block block row) or NULL. 2060 If you plan to factor the matrix you must leave room for the diagonal entry and 2061 set its value even if it is zero. 2062 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2063 submatrix (same for all local rows). 2064 - o_nnz - array containing the number of nonzeros in the various block rows of the 2065 off-diagonal portion of the local submatrix (possibly different for 2066 each block row) or NULL. 2067 2068 Output Parameter: 2069 . A - the matrix 2070 2071 Options Database Keys: 2072 . -mat_no_unroll - uses code that does not unroll the loops in the 2073 block calculations (much slower) 2074 . -mat_block_size - size of the blocks to use 2075 . -mat_mpi - use the parallel matrix data structures even on one processor 2076 (defaults to using SeqBAIJ format on one processor) 2077 2078 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2079 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2080 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2081 2082 Notes: 2083 The number of rows and columns must be divisible by blocksize. 2084 This matrix type does not support complex Hermitian operation. 2085 2086 The user MUST specify either the local or global matrix dimensions 2087 (possibly both). 2088 2089 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2090 than it must be used on all processors that share the object for that argument. 2091 2092 If the *_nnz parameter is given then the *_nz parameter is ignored 2093 2094 Storage Information: 2095 For a square global matrix we define each processor's diagonal portion 2096 to be its local rows and the corresponding columns (a square submatrix); 2097 each processor's off-diagonal portion encompasses the remainder of the 2098 local matrix (a rectangular submatrix). 2099 2100 The user can specify preallocated storage for the diagonal part of 2101 the local submatrix with either d_nz or d_nnz (not both). Set 2102 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2103 memory allocation. Likewise, specify preallocated storage for the 2104 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2105 2106 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2107 the figure below we depict these three local rows and all columns (0-11). 2108 2109 .vb 2110 0 1 2 3 4 5 6 7 8 9 10 11 2111 -------------------------- 2112 row 3 |. . . d d d o o o o o o 2113 row 4 |. . . d d d o o o o o o 2114 row 5 |. . . d d d o o o o o o 2115 -------------------------- 2116 .ve 2117 2118 Thus, any entries in the d locations are stored in the d (diagonal) 2119 submatrix, and any entries in the o locations are stored in the 2120 o (off-diagonal) submatrix. Note that the d matrix is stored in 2121 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2122 2123 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2124 plus the diagonal part of the d matrix, 2125 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2126 In general, for PDE problems in which most nonzeros are near the diagonal, 2127 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2128 or you will get TERRIBLE performance; see the users' manual chapter on 2129 matrices. 2130 2131 Level: intermediate 2132 2133 .keywords: matrix, block, aij, compressed row, sparse, parallel 2134 2135 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2136 @*/ 2137 2138 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2139 { 2140 PetscErrorCode ierr; 2141 PetscMPIInt size; 2142 2143 PetscFunctionBegin; 2144 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2145 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2146 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2147 if (size > 1) { 2148 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2149 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2150 } else { 2151 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2152 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2153 } 2154 PetscFunctionReturn(0); 2155 } 2156 2157 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2160 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2161 { 2162 Mat mat; 2163 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2164 PetscErrorCode ierr; 2165 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2166 PetscScalar *array; 2167 2168 PetscFunctionBegin; 2169 *newmat = 0; 2170 2171 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2172 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2173 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2174 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2175 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2176 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2177 2178 mat->factortype = matin->factortype; 2179 mat->preallocated = PETSC_TRUE; 2180 mat->assembled = PETSC_TRUE; 2181 mat->insertmode = NOT_SET_VALUES; 2182 2183 a = (Mat_MPISBAIJ*)mat->data; 2184 a->bs2 = oldmat->bs2; 2185 a->mbs = oldmat->mbs; 2186 a->nbs = oldmat->nbs; 2187 a->Mbs = oldmat->Mbs; 2188 a->Nbs = oldmat->Nbs; 2189 2190 2191 a->size = oldmat->size; 2192 a->rank = oldmat->rank; 2193 a->donotstash = oldmat->donotstash; 2194 a->roworiented = oldmat->roworiented; 2195 a->rowindices = 0; 2196 a->rowvalues = 0; 2197 a->getrowactive = PETSC_FALSE; 2198 a->barray = 0; 2199 a->rstartbs = oldmat->rstartbs; 2200 a->rendbs = oldmat->rendbs; 2201 a->cstartbs = oldmat->cstartbs; 2202 a->cendbs = oldmat->cendbs; 2203 2204 /* hash table stuff */ 2205 a->ht = 0; 2206 a->hd = 0; 2207 a->ht_size = 0; 2208 a->ht_flag = oldmat->ht_flag; 2209 a->ht_fact = oldmat->ht_fact; 2210 a->ht_total_ct = 0; 2211 a->ht_insert_ct = 0; 2212 2213 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2214 if (oldmat->colmap) { 2215 #if defined(PETSC_USE_CTABLE) 2216 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2217 #else 2218 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2219 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2220 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2221 #endif 2222 } else a->colmap = 0; 2223 2224 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2225 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2226 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2227 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2228 } else a->garray = 0; 2229 2230 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2231 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2232 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2233 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2234 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2235 2236 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2237 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2238 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2239 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2240 2241 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2242 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2243 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2244 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2245 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2246 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2247 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2248 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2249 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2250 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2251 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2252 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2253 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2254 2255 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2256 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2257 a->sMvctx = oldmat->sMvctx; 2258 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2259 2260 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2261 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2262 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2263 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2264 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2265 *newmat = mat; 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatLoad_MPISBAIJ" 2271 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2272 { 2273 PetscErrorCode ierr; 2274 PetscInt i,nz,j,rstart,rend; 2275 PetscScalar *vals,*buf; 2276 MPI_Comm comm; 2277 MPI_Status status; 2278 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2279 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2280 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2281 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2282 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2283 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2284 int fd; 2285 2286 PetscFunctionBegin; 2287 /* force binary viewer to load .info file if it has not yet done so */ 2288 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2289 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2290 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2291 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2292 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2293 if (bs < 0) bs = 1; 2294 2295 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2296 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2297 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2298 if (!rank) { 2299 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2300 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2301 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2302 } 2303 2304 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2305 2306 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2307 M = header[1]; 2308 N = header[2]; 2309 2310 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2311 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2312 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2313 2314 /* If global sizes are set, check if they are consistent with that given in the file */ 2315 if (sizesset) { 2316 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2317 } 2318 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2319 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2320 2321 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2322 2323 /* 2324 This code adds extra rows to make sure the number of rows is 2325 divisible by the blocksize 2326 */ 2327 Mbs = M/bs; 2328 extra_rows = bs - M + bs*(Mbs); 2329 if (extra_rows == bs) extra_rows = 0; 2330 else Mbs++; 2331 if (extra_rows &&!rank) { 2332 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2333 } 2334 2335 /* determine ownership of all rows */ 2336 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2337 mbs = Mbs/size + ((Mbs % size) > rank); 2338 m = mbs*bs; 2339 } else { /* User Set */ 2340 m = newmat->rmap->n; 2341 mbs = m/bs; 2342 } 2343 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2344 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2345 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2346 rowners[0] = 0; 2347 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2348 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2349 rstart = rowners[rank]; 2350 rend = rowners[rank+1]; 2351 2352 /* distribute row lengths to all processors */ 2353 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2354 if (!rank) { 2355 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2356 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2357 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2358 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2359 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2360 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2361 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2362 } else { 2363 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2364 } 2365 2366 if (!rank) { /* procs[0] */ 2367 /* calculate the number of nonzeros on each processor */ 2368 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2369 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2370 for (i=0; i<size; i++) { 2371 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2372 procsnz[i] += rowlengths[j]; 2373 } 2374 } 2375 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2376 2377 /* determine max buffer needed and allocate it */ 2378 maxnz = 0; 2379 for (i=0; i<size; i++) { 2380 maxnz = PetscMax(maxnz,procsnz[i]); 2381 } 2382 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2383 2384 /* read in my part of the matrix column indices */ 2385 nz = procsnz[0]; 2386 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2387 mycols = ibuf; 2388 if (size == 1) nz -= extra_rows; 2389 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2390 if (size == 1) { 2391 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2392 } 2393 2394 /* read in every ones (except the last) and ship off */ 2395 for (i=1; i<size-1; i++) { 2396 nz = procsnz[i]; 2397 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2398 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2399 } 2400 /* read in the stuff for the last proc */ 2401 if (size != 1) { 2402 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2403 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2404 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2405 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2406 } 2407 ierr = PetscFree(cols);CHKERRQ(ierr); 2408 } else { /* procs[i], i>0 */ 2409 /* determine buffer space needed for message */ 2410 nz = 0; 2411 for (i=0; i<m; i++) nz += locrowlens[i]; 2412 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2413 mycols = ibuf; 2414 /* receive message of column indices*/ 2415 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2416 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2417 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2418 } 2419 2420 /* loop over local rows, determining number of off diagonal entries */ 2421 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2422 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2423 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2424 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2425 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2426 rowcount = 0; 2427 nzcount = 0; 2428 for (i=0; i<mbs; i++) { 2429 dcount = 0; 2430 odcount = 0; 2431 for (j=0; j<bs; j++) { 2432 kmax = locrowlens[rowcount]; 2433 for (k=0; k<kmax; k++) { 2434 tmp = mycols[nzcount++]/bs; /* block col. index */ 2435 if (!mask[tmp]) { 2436 mask[tmp] = 1; 2437 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2438 else masked1[dcount++] = tmp; /* entry in diag portion */ 2439 } 2440 } 2441 rowcount++; 2442 } 2443 2444 dlens[i] = dcount; /* d_nzz[i] */ 2445 odlens[i] = odcount; /* o_nzz[i] */ 2446 2447 /* zero out the mask elements we set */ 2448 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2449 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2450 } 2451 if (!sizesset) { 2452 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2453 } 2454 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2455 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2456 2457 if (!rank) { 2458 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2459 /* read in my part of the matrix numerical values */ 2460 nz = procsnz[0]; 2461 vals = buf; 2462 mycols = ibuf; 2463 if (size == 1) nz -= extra_rows; 2464 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2465 if (size == 1) { 2466 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2467 } 2468 2469 /* insert into matrix */ 2470 jj = rstart*bs; 2471 for (i=0; i<m; i++) { 2472 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2473 mycols += locrowlens[i]; 2474 vals += locrowlens[i]; 2475 jj++; 2476 } 2477 2478 /* read in other processors (except the last one) and ship out */ 2479 for (i=1; i<size-1; i++) { 2480 nz = procsnz[i]; 2481 vals = buf; 2482 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2483 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2484 } 2485 /* the last proc */ 2486 if (size != 1) { 2487 nz = procsnz[i] - extra_rows; 2488 vals = buf; 2489 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2490 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2491 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2492 } 2493 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2494 2495 } else { 2496 /* receive numeric values */ 2497 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2498 2499 /* receive message of values*/ 2500 vals = buf; 2501 mycols = ibuf; 2502 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2503 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2504 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2505 2506 /* insert into matrix */ 2507 jj = rstart*bs; 2508 for (i=0; i<m; i++) { 2509 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2510 mycols += locrowlens[i]; 2511 vals += locrowlens[i]; 2512 jj++; 2513 } 2514 } 2515 2516 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2517 ierr = PetscFree(buf);CHKERRQ(ierr); 2518 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2519 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2520 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2521 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2522 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2523 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2524 PetscFunctionReturn(0); 2525 } 2526 2527 #undef __FUNCT__ 2528 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2529 /*XXXXX@ 2530 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2531 2532 Input Parameters: 2533 . mat - the matrix 2534 . fact - factor 2535 2536 Not Collective on Mat, each process can have a different hash factor 2537 2538 Level: advanced 2539 2540 Notes: 2541 This can also be set by the command line option: -mat_use_hash_table fact 2542 2543 .keywords: matrix, hashtable, factor, HT 2544 2545 .seealso: MatSetOption() 2546 @XXXXX*/ 2547 2548 2549 #undef __FUNCT__ 2550 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2551 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2552 { 2553 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2554 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2555 PetscReal atmp; 2556 PetscReal *work,*svalues,*rvalues; 2557 PetscErrorCode ierr; 2558 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2559 PetscMPIInt rank,size; 2560 PetscInt *rowners_bs,dest,count,source; 2561 PetscScalar *va; 2562 MatScalar *ba; 2563 MPI_Status stat; 2564 2565 PetscFunctionBegin; 2566 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2567 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2568 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2569 2570 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2571 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2572 2573 bs = A->rmap->bs; 2574 mbs = a->mbs; 2575 Mbs = a->Mbs; 2576 ba = b->a; 2577 bi = b->i; 2578 bj = b->j; 2579 2580 /* find ownerships */ 2581 rowners_bs = A->rmap->range; 2582 2583 /* each proc creates an array to be distributed */ 2584 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2585 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2586 2587 /* row_max for B */ 2588 if (rank != size-1) { 2589 for (i=0; i<mbs; i++) { 2590 ncols = bi[1] - bi[0]; bi++; 2591 brow = bs*i; 2592 for (j=0; j<ncols; j++) { 2593 bcol = bs*(*bj); 2594 for (kcol=0; kcol<bs; kcol++) { 2595 col = bcol + kcol; /* local col index */ 2596 col += rowners_bs[rank+1]; /* global col index */ 2597 for (krow=0; krow<bs; krow++) { 2598 atmp = PetscAbsScalar(*ba); ba++; 2599 row = brow + krow; /* local row index */ 2600 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2601 if (work[col] < atmp) work[col] = atmp; 2602 } 2603 } 2604 bj++; 2605 } 2606 } 2607 2608 /* send values to its owners */ 2609 for (dest=rank+1; dest<size; dest++) { 2610 svalues = work + rowners_bs[dest]; 2611 count = rowners_bs[dest+1]-rowners_bs[dest]; 2612 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2613 } 2614 } 2615 2616 /* receive values */ 2617 if (rank) { 2618 rvalues = work; 2619 count = rowners_bs[rank+1]-rowners_bs[rank]; 2620 for (source=0; source<rank; source++) { 2621 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2622 /* process values */ 2623 for (i=0; i<count; i++) { 2624 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2625 } 2626 } 2627 } 2628 2629 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2630 ierr = PetscFree(work);CHKERRQ(ierr); 2631 PetscFunctionReturn(0); 2632 } 2633 2634 #undef __FUNCT__ 2635 #define __FUNCT__ "MatSOR_MPISBAIJ" 2636 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2637 { 2638 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2639 PetscErrorCode ierr; 2640 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2641 PetscScalar *x,*ptr,*from; 2642 Vec bb1; 2643 const PetscScalar *b; 2644 2645 PetscFunctionBegin; 2646 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2647 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2648 2649 if (flag == SOR_APPLY_UPPER) { 2650 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2651 PetscFunctionReturn(0); 2652 } 2653 2654 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2655 if (flag & SOR_ZERO_INITIAL_GUESS) { 2656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2657 its--; 2658 } 2659 2660 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2661 while (its--) { 2662 2663 /* lower triangular part: slvec0b = - B^T*xx */ 2664 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2665 2666 /* copy xx into slvec0a */ 2667 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2668 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2669 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2670 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2671 2672 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2673 2674 /* copy bb into slvec1a */ 2675 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2676 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2677 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2678 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2679 2680 /* set slvec1b = 0 */ 2681 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2682 2683 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2684 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2685 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2686 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2687 2688 /* upper triangular part: bb1 = bb1 - B*x */ 2689 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2690 2691 /* local diagonal sweep */ 2692 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2693 } 2694 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2695 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2696 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2697 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2698 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2699 } else if (flag & SOR_EISENSTAT) { 2700 Vec xx1; 2701 PetscBool hasop; 2702 const PetscScalar *diag; 2703 PetscScalar *sl,scale = (omega - 2.0)/omega; 2704 PetscInt i,n; 2705 2706 if (!mat->xx1) { 2707 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2708 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2709 } 2710 xx1 = mat->xx1; 2711 bb1 = mat->bb1; 2712 2713 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2714 2715 if (!mat->diag) { 2716 /* this is wrong for same matrix with new nonzero values */ 2717 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2718 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2719 } 2720 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2721 2722 if (hasop) { 2723 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2724 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2725 } else { 2726 /* 2727 These two lines are replaced by code that may be a bit faster for a good compiler 2728 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2729 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2730 */ 2731 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2732 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2733 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2734 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2735 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2736 if (omega == 1.0) { 2737 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2738 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2739 } else { 2740 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2741 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2742 } 2743 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2744 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2745 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2746 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2747 } 2748 2749 /* multiply off-diagonal portion of matrix */ 2750 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2751 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2752 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2753 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2754 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2755 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2756 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2757 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2758 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2759 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2760 2761 /* local sweep */ 2762 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2763 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2764 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2765 PetscFunctionReturn(0); 2766 } 2767 2768 #undef __FUNCT__ 2769 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2770 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2771 { 2772 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2773 PetscErrorCode ierr; 2774 Vec lvec1,bb1; 2775 2776 PetscFunctionBegin; 2777 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2778 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2779 2780 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2781 if (flag & SOR_ZERO_INITIAL_GUESS) { 2782 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2783 its--; 2784 } 2785 2786 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2787 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2788 while (its--) { 2789 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2790 2791 /* lower diagonal part: bb1 = bb - B^T*xx */ 2792 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2793 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2794 2795 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2796 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2797 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2798 2799 /* upper diagonal part: bb1 = bb1 - B*x */ 2800 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2801 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2802 2803 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2804 2805 /* diagonal sweep */ 2806 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2807 } 2808 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2809 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2810 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2811 PetscFunctionReturn(0); 2812 } 2813 2814 #undef __FUNCT__ 2815 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2816 /*@ 2817 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2818 CSR format the local rows. 2819 2820 Collective on MPI_Comm 2821 2822 Input Parameters: 2823 + comm - MPI communicator 2824 . bs - the block size, only a block size of 1 is supported 2825 . m - number of local rows (Cannot be PETSC_DECIDE) 2826 . n - This value should be the same as the local size used in creating the 2827 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2828 calculated if N is given) For square matrices n is almost always m. 2829 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2830 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2831 . i - row indices 2832 . j - column indices 2833 - a - matrix values 2834 2835 Output Parameter: 2836 . mat - the matrix 2837 2838 Level: intermediate 2839 2840 Notes: 2841 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2842 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2843 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2844 2845 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2846 2847 .keywords: matrix, aij, compressed row, sparse, parallel 2848 2849 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2850 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2851 @*/ 2852 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2853 { 2854 PetscErrorCode ierr; 2855 2856 2857 PetscFunctionBegin; 2858 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2859 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2860 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2861 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2862 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2863 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2864 PetscFunctionReturn(0); 2865 } 2866 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2870 /*@C 2871 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2872 (the default parallel PETSc format). 2873 2874 Collective on MPI_Comm 2875 2876 Input Parameters: 2877 + B - the matrix 2878 . bs - the block size 2879 . i - the indices into j for the start of each local row (starts with zero) 2880 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2881 - v - optional values in the matrix 2882 2883 Level: developer 2884 2885 .keywords: matrix, aij, compressed row, sparse, parallel 2886 2887 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2888 @*/ 2889 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2890 { 2891 PetscErrorCode ierr; 2892 2893 PetscFunctionBegin; 2894 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2895 PetscFunctionReturn(0); 2896 } 2897 2898 #undef __FUNCT__ 2899 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 2900 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2901 { 2902 PetscErrorCode ierr; 2903 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2904 PetscInt *indx; 2905 PetscScalar *values; 2906 2907 PetscFunctionBegin; 2908 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2909 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2910 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2911 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 2912 PetscInt *bindx,rmax=a->rmax,j; 2913 2914 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2915 mbs = m/bs; Nbs = N/cbs; 2916 if (n == PETSC_DECIDE) { 2917 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 2918 } 2919 /* Check sum(n) = Nbs */ 2920 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2921 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 2922 2923 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2924 rstart -= mbs; 2925 2926 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2927 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 2928 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2929 for (i=0; i<mbs; i++) { 2930 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 2931 nnz = nnz/bs; 2932 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2933 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 2934 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 2935 } 2936 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2937 ierr = PetscFree(bindx);CHKERRQ(ierr); 2938 2939 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2940 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2941 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 2942 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 2943 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2944 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2945 } 2946 2947 /* numeric phase */ 2948 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2949 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 2950 2951 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2952 for (i=0; i<m; i++) { 2953 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2954 Ii = i + rstart; 2955 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2956 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2957 } 2958 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2959 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2960 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2961 PetscFunctionReturn(0); 2962 } 2963