1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "../src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 EXTERN_C_BEGIN 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 26 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 27 { 28 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 29 PetscErrorCode ierr; 30 31 PetscFunctionBegin; 32 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 33 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN_C_BEGIN 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 EXTERN_C_END 52 53 54 #define CHUNKSIZE 10 55 56 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 57 { \ 58 \ 59 brow = row/bs; \ 60 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 61 rmax = aimax[brow]; nrow = ailen[brow]; \ 62 bcol = col/bs; \ 63 ridx = row % bs; cidx = col % bs; \ 64 low = 0; high = nrow; \ 65 while (high-low > 3) { \ 66 t = (low+high)/2; \ 67 if (rp[t] > bcol) high = t; \ 68 else low = t; \ 69 } \ 70 for (_i=low; _i<high; _i++) { \ 71 if (rp[_i] > bcol) break; \ 72 if (rp[_i] == bcol) { \ 73 bap = ap + bs2*_i + bs*cidx + ridx; \ 74 if (addv == ADD_VALUES) *bap += value; \ 75 else *bap = value; \ 76 goto a_noinsert; \ 77 } \ 78 } \ 79 if (a->nonew == 1) goto a_noinsert; \ 80 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 81 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 82 N = nrow++ - 1; \ 83 /* shift up all the later entries in this row */ \ 84 for (ii=N; ii>=_i; ii--) { \ 85 rp[ii+1] = rp[ii]; \ 86 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 87 } \ 88 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 89 rp[_i] = bcol; \ 90 ap[bs2*_i + bs*cidx + ridx] = value; \ 91 a_noinsert:; \ 92 ailen[brow] = nrow; \ 93 } 94 95 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 96 { \ 97 brow = row/bs; \ 98 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 99 rmax = bimax[brow]; nrow = bilen[brow]; \ 100 bcol = col/bs; \ 101 ridx = row % bs; cidx = col % bs; \ 102 low = 0; high = nrow; \ 103 while (high-low > 3) { \ 104 t = (low+high)/2; \ 105 if (rp[t] > bcol) high = t; \ 106 else low = t; \ 107 } \ 108 for (_i=low; _i<high; _i++) { \ 109 if (rp[_i] > bcol) break; \ 110 if (rp[_i] == bcol) { \ 111 bap = ap + bs2*_i + bs*cidx + ridx; \ 112 if (addv == ADD_VALUES) *bap += value; \ 113 else *bap = value; \ 114 goto b_noinsert; \ 115 } \ 116 } \ 117 if (b->nonew == 1) goto b_noinsert; \ 118 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 119 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 120 N = nrow++ - 1; \ 121 /* shift up all the later entries in this row */ \ 122 for (ii=N; ii>=_i; ii--) { \ 123 rp[ii+1] = rp[ii]; \ 124 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 125 } \ 126 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 127 rp[_i] = bcol; \ 128 ap[bs2*_i + bs*cidx + ridx] = value; \ 129 b_noinsert:; \ 130 bilen[brow] = nrow; \ 131 } 132 133 /* Only add/insert a(i,j) with i<=j (blocks). 134 Any a(i,j) with i>j input by user is ingored. 135 */ 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatSetValues_MPISBAIJ" 138 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 139 { 140 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 141 MatScalar value; 142 PetscTruth roworiented = baij->roworiented; 143 PetscErrorCode ierr; 144 PetscInt i,j,row,col; 145 PetscInt rstart_orig=mat->rmap->rstart; 146 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 147 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 148 149 /* Some Variables required in the macro */ 150 Mat A = baij->A; 151 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 152 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 153 MatScalar *aa=a->a; 154 155 Mat B = baij->B; 156 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 157 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 158 MatScalar *ba=b->a; 159 160 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 161 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 162 MatScalar *ap,*bap; 163 164 /* for stash */ 165 PetscInt n_loc, *in_loc = PETSC_NULL; 166 MatScalar *v_loc = PETSC_NULL; 167 168 PetscFunctionBegin; 169 170 if (!baij->donotstash){ 171 if (n > baij->n_loc) { 172 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 173 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 174 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 175 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 176 baij->n_loc = n; 177 } 178 in_loc = baij->in_loc; 179 v_loc = baij->v_loc; 180 } 181 182 for (i=0; i<m; i++) { 183 if (im[i] < 0) continue; 184 #if defined(PETSC_USE_DEBUG) 185 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 186 #endif 187 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 188 row = im[i] - rstart_orig; /* local row index */ 189 for (j=0; j<n; j++) { 190 if (im[i]/bs > in[j]/bs){ 191 if (a->ignore_ltriangular){ 192 continue; /* ignore lower triangular blocks */ 193 } else { 194 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 195 } 196 } 197 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 198 col = in[j] - cstart_orig; /* local col index */ 199 brow = row/bs; bcol = col/bs; 200 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 201 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 202 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 203 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 204 } else if (in[j] < 0) continue; 205 #if defined(PETSC_USE_DEBUG) 206 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 207 #endif 208 else { /* off-diag entry (B) */ 209 if (mat->was_assembled) { 210 if (!baij->colmap) { 211 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 212 } 213 #if defined (PETSC_USE_CTABLE) 214 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 215 col = col - 1; 216 #else 217 col = baij->colmap[in[j]/bs] - 1; 218 #endif 219 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 220 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 221 col = in[j]; 222 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 223 B = baij->B; 224 b = (Mat_SeqBAIJ*)(B)->data; 225 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 226 ba=b->a; 227 } else col += in[j]%bs; 228 } else col = in[j]; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } 233 } 234 } else { /* off processor entry */ 235 if (!baij->donotstash) { 236 n_loc = 0; 237 for (j=0; j<n; j++){ 238 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 239 in_loc[n_loc] = in[j]; 240 if (roworiented) { 241 v_loc[n_loc] = v[i*n+j]; 242 } else { 243 v_loc[n_loc] = v[j*m+i]; 244 } 245 n_loc++; 246 } 247 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 257 { 258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 259 const MatScalar *value; 260 MatScalar *barray=baij->barray; 261 PetscTruth roworiented = baij->roworiented; 262 PetscErrorCode ierr; 263 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 264 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 265 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 266 267 PetscFunctionBegin; 268 if(!barray) { 269 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 270 baij->barray = barray; 271 } 272 273 if (roworiented) { 274 stepval = (n-1)*bs; 275 } else { 276 stepval = (m-1)*bs; 277 } 278 for (i=0; i<m; i++) { 279 if (im[i] < 0) continue; 280 #if defined(PETSC_USE_DEBUG) 281 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 282 #endif 283 if (im[i] >= rstart && im[i] < rend) { 284 row = im[i] - rstart; 285 for (j=0; j<n; j++) { 286 /* If NumCol = 1 then a copy is not required */ 287 if ((roworiented) && (n == 1)) { 288 barray = (MatScalar*) v + i*bs2; 289 } else if((!roworiented) && (m == 1)) { 290 barray = (MatScalar*) v + j*bs2; 291 } else { /* Here a copy is required */ 292 if (roworiented) { 293 value = v + i*(stepval+bs)*bs + j*bs; 294 } else { 295 value = v + j*(stepval+bs)*bs + i*bs; 296 } 297 for (ii=0; ii<bs; ii++,value+=stepval) { 298 for (jj=0; jj<bs; jj++) { 299 *barray++ = *value++; 300 } 301 } 302 barray -=bs2; 303 } 304 305 if (in[j] >= cstart && in[j] < cend){ 306 col = in[j] - cstart; 307 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 308 } 309 else if (in[j] < 0) continue; 310 #if defined(PETSC_USE_DEBUG) 311 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 312 #endif 313 else { 314 if (mat->was_assembled) { 315 if (!baij->colmap) { 316 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 317 } 318 319 #if defined(PETSC_USE_DEBUG) 320 #if defined (PETSC_USE_CTABLE) 321 { PetscInt data; 322 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 323 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 324 } 325 #else 326 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 327 #endif 328 #endif 329 #if defined (PETSC_USE_CTABLE) 330 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 331 col = (col - 1)/bs; 332 #else 333 col = (baij->colmap[in[j]] - 1)/bs; 334 #endif 335 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 336 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 337 col = in[j]; 338 } 339 } 340 else col = in[j]; 341 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 342 } 343 } 344 } else { 345 if (!baij->donotstash) { 346 if (roworiented) { 347 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 348 } else { 349 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 350 } 351 } 352 } 353 } 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatGetValues_MPISBAIJ" 359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 360 { 361 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 364 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 365 366 PetscFunctionBegin; 367 for (i=0; i<m; i++) { 368 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 369 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 370 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 371 row = idxm[i] - bsrstart; 372 for (j=0; j<n; j++) { 373 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 374 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 375 if (idxn[j] >= bscstart && idxn[j] < bscend){ 376 col = idxn[j] - bscstart; 377 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 378 } else { 379 if (!baij->colmap) { 380 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 381 } 382 #if defined (PETSC_USE_CTABLE) 383 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 384 data --; 385 #else 386 data = baij->colmap[idxn[j]/bs]-1; 387 #endif 388 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 389 else { 390 col = data + idxn[j]%bs; 391 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 392 } 393 } 394 } 395 } else { 396 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 397 } 398 } 399 PetscFunctionReturn(0); 400 } 401 402 #undef __FUNCT__ 403 #define __FUNCT__ "MatNorm_MPISBAIJ" 404 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 405 { 406 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 407 PetscErrorCode ierr; 408 PetscReal sum[2],*lnorm2; 409 410 PetscFunctionBegin; 411 if (baij->size == 1) { 412 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 413 } else { 414 if (type == NORM_FROBENIUS) { 415 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 416 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 417 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 418 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 419 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 420 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 421 *norm = sqrt(sum[0] + 2*sum[1]); 422 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 423 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 424 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 425 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 426 PetscReal *rsum,*rsum2,vabs; 427 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 428 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 429 MatScalar *v; 430 431 ierr = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 432 rsum2 = rsum + mat->cmap->N; 433 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 434 /* Amat */ 435 v = amat->a; jj = amat->j; 436 for (brow=0; brow<mbs; brow++) { 437 grow = bs*(rstart + brow); 438 nz = amat->i[brow+1] - amat->i[brow]; 439 for (bcol=0; bcol<nz; bcol++){ 440 gcol = bs*(rstart + *jj); jj++; 441 for (col=0; col<bs; col++){ 442 for (row=0; row<bs; row++){ 443 vabs = PetscAbsScalar(*v); v++; 444 rsum[gcol+col] += vabs; 445 /* non-diagonal block */ 446 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 447 } 448 } 449 } 450 } 451 /* Bmat */ 452 v = bmat->a; jj = bmat->j; 453 for (brow=0; brow<mbs; brow++) { 454 grow = bs*(rstart + brow); 455 nz = bmat->i[brow+1] - bmat->i[brow]; 456 for (bcol=0; bcol<nz; bcol++){ 457 gcol = bs*garray[*jj]; jj++; 458 for (col=0; col<bs; col++){ 459 for (row=0; row<bs; row++){ 460 vabs = PetscAbsScalar(*v); v++; 461 rsum[gcol+col] += vabs; 462 rsum[grow+row] += vabs; 463 } 464 } 465 } 466 } 467 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 *norm = 0.0; 469 for (col=0; col<mat->cmap->N; col++) { 470 if (rsum2[col] > *norm) *norm = rsum2[col]; 471 } 472 ierr = PetscFree(rsum);CHKERRQ(ierr); 473 } else { 474 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 475 } 476 } 477 PetscFunctionReturn(0); 478 } 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 482 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 483 { 484 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 485 PetscErrorCode ierr; 486 PetscInt nstash,reallocs; 487 InsertMode addv; 488 489 PetscFunctionBegin; 490 if (baij->donotstash) { 491 PetscFunctionReturn(0); 492 } 493 494 /* make sure all processors are either in INSERTMODE or ADDMODE */ 495 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 496 if (addv == (ADD_VALUES|INSERT_VALUES)) { 497 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 498 } 499 mat->insertmode = addv; /* in case this processor had no cache */ 500 501 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 502 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 503 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 504 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 505 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 506 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 513 { 514 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 515 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 516 PetscErrorCode ierr; 517 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 518 PetscInt *row,*col; 519 PetscTruth other_disassembled; 520 PetscMPIInt n; 521 PetscTruth r1,r2,r3; 522 MatScalar *val; 523 InsertMode addv = mat->insertmode; 524 525 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 526 PetscFunctionBegin; 527 528 if (!baij->donotstash) { 529 while (1) { 530 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 531 if (!flg) break; 532 533 for (i=0; i<n;) { 534 /* Now identify the consecutive vals belonging to the same row */ 535 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 536 if (j < n) ncols = j-i; 537 else ncols = n-i; 538 /* Now assemble all these values with a single function call */ 539 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 540 i = j; 541 } 542 } 543 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 544 /* Now process the block-stash. Since the values are stashed column-oriented, 545 set the roworiented flag to column oriented, and after MatSetValues() 546 restore the original flags */ 547 r1 = baij->roworiented; 548 r2 = a->roworiented; 549 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 550 baij->roworiented = PETSC_FALSE; 551 a->roworiented = PETSC_FALSE; 552 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 553 while (1) { 554 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 555 if (!flg) break; 556 557 for (i=0; i<n;) { 558 /* Now identify the consecutive vals belonging to the same row */ 559 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 560 if (j < n) ncols = j-i; 561 else ncols = n-i; 562 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 563 i = j; 564 } 565 } 566 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 567 baij->roworiented = r1; 568 a->roworiented = r2; 569 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 570 } 571 572 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 573 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 574 575 /* determine if any processor has disassembled, if so we must 576 also disassemble ourselfs, in order that we may reassemble. */ 577 /* 578 if nonzero structure of submatrix B cannot change then we know that 579 no processor disassembled thus we can skip this stuff 580 */ 581 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 582 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 583 if (mat->was_assembled && !other_disassembled) { 584 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 585 } 586 } 587 588 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 589 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 590 } 591 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 592 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 594 595 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 596 baij->rowvalues = 0; 597 598 PetscFunctionReturn(0); 599 } 600 601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 605 { 606 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 607 PetscErrorCode ierr; 608 PetscInt bs = mat->rmap->bs; 609 PetscMPIInt size = baij->size,rank = baij->rank; 610 PetscTruth iascii,isdraw; 611 PetscViewer sviewer; 612 PetscViewerFormat format; 613 614 PetscFunctionBegin; 615 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 616 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 617 if (iascii) { 618 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 619 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 620 MatInfo info; 621 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 622 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 623 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 624 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 625 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 626 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 628 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 630 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 631 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 632 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 633 PetscFunctionReturn(0); 634 } else if (format == PETSC_VIEWER_ASCII_INFO) { 635 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 638 PetscFunctionReturn(0); 639 } 640 } 641 642 if (isdraw) { 643 PetscDraw draw; 644 PetscTruth isnull; 645 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 646 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 647 } 648 649 if (size == 1) { 650 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 651 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 652 } else { 653 /* assemble the entire matrix onto first processor. */ 654 Mat A; 655 Mat_SeqSBAIJ *Aloc; 656 Mat_SeqBAIJ *Bloc; 657 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 658 MatScalar *a; 659 660 /* Should this be the same type as mat? */ 661 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 662 if (!rank) { 663 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 664 } else { 665 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 666 } 667 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 668 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 669 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 670 671 /* copy over the A part */ 672 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 673 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 674 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 675 676 for (i=0; i<mbs; i++) { 677 rvals[0] = bs*(baij->rstartbs + i); 678 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 679 for (j=ai[i]; j<ai[i+1]; j++) { 680 col = (baij->cstartbs+aj[j])*bs; 681 for (k=0; k<bs; k++) { 682 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 683 col++; a += bs; 684 } 685 } 686 } 687 /* copy over the B part */ 688 Bloc = (Mat_SeqBAIJ*)baij->B->data; 689 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 690 for (i=0; i<mbs; i++) { 691 692 rvals[0] = bs*(baij->rstartbs + i); 693 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 694 for (j=ai[i]; j<ai[i+1]; j++) { 695 col = baij->garray[aj[j]]*bs; 696 for (k=0; k<bs; k++) { 697 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 698 col++; a += bs; 699 } 700 } 701 } 702 ierr = PetscFree(rvals);CHKERRQ(ierr); 703 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 705 /* 706 Everyone has to call to draw the matrix since the graphics waits are 707 synchronized across all processors that share the PetscDraw object 708 */ 709 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 710 if (!rank) { 711 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 712 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 713 } 714 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 715 ierr = MatDestroy(A);CHKERRQ(ierr); 716 } 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNCT__ 721 #define __FUNCT__ "MatView_MPISBAIJ" 722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 723 { 724 PetscErrorCode ierr; 725 PetscTruth iascii,isdraw,issocket,isbinary; 726 727 PetscFunctionBegin; 728 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 730 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 732 if (iascii || isdraw || issocket || isbinary) { 733 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 734 } else { 735 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatDestroy_MPISBAIJ" 742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 743 { 744 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 #if defined(PETSC_USE_LOG) 749 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 750 #endif 751 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 752 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 753 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 754 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 755 #if defined (PETSC_USE_CTABLE) 756 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 757 #else 758 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 759 #endif 760 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 761 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 762 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 763 if (baij->slvec0) { 764 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 765 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 766 } 767 if (baij->slvec1) { 768 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 769 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 770 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 771 } 772 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 773 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 774 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 775 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 776 if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);} 777 if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);} 778 if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);} 779 #if defined(PETSC_USE_MAT_SINGLE) 780 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 781 #endif 782 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 783 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 784 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 785 ierr = PetscFree(baij);CHKERRQ(ierr); 786 787 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatMult_MPISBAIJ" 797 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 798 { 799 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 800 PetscErrorCode ierr; 801 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 802 PetscScalar *x,*from; 803 804 PetscFunctionBegin; 805 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 806 if (nt != A->cmap->n) { 807 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 808 } 809 810 /* diagonal part */ 811 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 812 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 813 814 /* subdiagonal part */ 815 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 816 817 /* copy x into the vec slvec0 */ 818 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 819 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 820 821 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 822 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 823 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 824 825 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 827 /* supperdiagonal part */ 828 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 829 PetscFunctionReturn(0); 830 } 831 832 #undef __FUNCT__ 833 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 834 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 835 { 836 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 837 PetscErrorCode ierr; 838 PetscInt nt; 839 840 PetscFunctionBegin; 841 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 842 if (nt != A->cmap->n) { 843 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 844 } 845 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 846 if (nt != A->rmap->N) { 847 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 848 } 849 850 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 851 /* do diagonal part */ 852 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 853 /* do supperdiagonal part */ 854 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 855 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 856 /* do subdiagonal part */ 857 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 858 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 859 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 860 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 866 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 867 { 868 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 869 PetscErrorCode ierr; 870 PetscInt mbs=a->mbs,bs=A->rmap->bs; 871 PetscScalar *x,*from,zero=0.0; 872 873 PetscFunctionBegin; 874 /* 875 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 876 PetscSynchronizedFlush(((PetscObject)A)->comm); 877 */ 878 /* diagonal part */ 879 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 880 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 881 882 /* subdiagonal part */ 883 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 884 885 /* copy x into the vec slvec0 */ 886 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 887 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 888 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 889 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 890 891 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 892 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 893 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 895 /* supperdiagonal part */ 896 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 897 898 PetscFunctionReturn(0); 899 } 900 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 903 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 904 { 905 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 906 PetscErrorCode ierr; 907 908 PetscFunctionBegin; 909 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 910 /* do diagonal part */ 911 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 912 /* do supperdiagonal part */ 913 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 915 916 /* do subdiagonal part */ 917 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 918 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 919 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 920 921 PetscFunctionReturn(0); 922 } 923 924 /* 925 This only works correctly for square matrices where the subblock A->A is the 926 diagonal block 927 */ 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 930 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 931 { 932 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 933 PetscErrorCode ierr; 934 935 PetscFunctionBegin; 936 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 937 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatScale_MPISBAIJ" 943 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 944 { 945 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 946 PetscErrorCode ierr; 947 948 PetscFunctionBegin; 949 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 950 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 951 PetscFunctionReturn(0); 952 } 953 954 #undef __FUNCT__ 955 #define __FUNCT__ "MatGetRow_MPISBAIJ" 956 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 957 { 958 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 959 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 960 PetscErrorCode ierr; 961 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 962 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 963 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 964 965 PetscFunctionBegin; 966 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 967 mat->getrowactive = PETSC_TRUE; 968 969 if (!mat->rowvalues && (idx || v)) { 970 /* 971 allocate enough space to hold information from the longest row. 972 */ 973 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 974 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 975 PetscInt max = 1,mbs = mat->mbs,tmp; 976 for (i=0; i<mbs; i++) { 977 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 978 if (max < tmp) { max = tmp; } 979 } 980 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 981 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 982 } 983 984 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 985 lrow = row - brstart; /* local row index */ 986 987 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 988 if (!v) {pvA = 0; pvB = 0;} 989 if (!idx) {pcA = 0; if (!v) pcB = 0;} 990 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 991 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 992 nztot = nzA + nzB; 993 994 cmap = mat->garray; 995 if (v || idx) { 996 if (nztot) { 997 /* Sort by increasing column numbers, assuming A and B already sorted */ 998 PetscInt imark = -1; 999 if (v) { 1000 *v = v_p = mat->rowvalues; 1001 for (i=0; i<nzB; i++) { 1002 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1003 else break; 1004 } 1005 imark = i; 1006 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1007 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1008 } 1009 if (idx) { 1010 *idx = idx_p = mat->rowindices; 1011 if (imark > -1) { 1012 for (i=0; i<imark; i++) { 1013 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1014 } 1015 } else { 1016 for (i=0; i<nzB; i++) { 1017 if (cmap[cworkB[i]/bs] < cstart) 1018 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1019 else break; 1020 } 1021 imark = i; 1022 } 1023 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1024 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1025 } 1026 } else { 1027 if (idx) *idx = 0; 1028 if (v) *v = 0; 1029 } 1030 } 1031 *nz = nztot; 1032 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1033 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1039 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1040 { 1041 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1042 1043 PetscFunctionBegin; 1044 if (!baij->getrowactive) { 1045 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1046 } 1047 baij->getrowactive = PETSC_FALSE; 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1053 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1054 { 1055 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1056 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1057 1058 PetscFunctionBegin; 1059 aA->getrow_utriangular = PETSC_TRUE; 1060 PetscFunctionReturn(0); 1061 } 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1064 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1065 { 1066 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1067 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1068 1069 PetscFunctionBegin; 1070 aA->getrow_utriangular = PETSC_FALSE; 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1076 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1077 { 1078 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1079 PetscErrorCode ierr; 1080 1081 PetscFunctionBegin; 1082 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1083 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1089 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1090 { 1091 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1096 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1102 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1103 { 1104 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 1107 PetscFunctionBegin; 1108 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1109 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1115 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1116 { 1117 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1118 Mat A = a->A,B = a->B; 1119 PetscErrorCode ierr; 1120 PetscReal isend[5],irecv[5]; 1121 1122 PetscFunctionBegin; 1123 info->block_size = (PetscReal)matin->rmap->bs; 1124 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1125 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1126 isend[3] = info->memory; isend[4] = info->mallocs; 1127 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1128 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1129 isend[3] += info->memory; isend[4] += info->mallocs; 1130 if (flag == MAT_LOCAL) { 1131 info->nz_used = isend[0]; 1132 info->nz_allocated = isend[1]; 1133 info->nz_unneeded = isend[2]; 1134 info->memory = isend[3]; 1135 info->mallocs = isend[4]; 1136 } else if (flag == MAT_GLOBAL_MAX) { 1137 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1138 info->nz_used = irecv[0]; 1139 info->nz_allocated = irecv[1]; 1140 info->nz_unneeded = irecv[2]; 1141 info->memory = irecv[3]; 1142 info->mallocs = irecv[4]; 1143 } else if (flag == MAT_GLOBAL_SUM) { 1144 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1145 info->nz_used = irecv[0]; 1146 info->nz_allocated = irecv[1]; 1147 info->nz_unneeded = irecv[2]; 1148 info->memory = irecv[3]; 1149 info->mallocs = irecv[4]; 1150 } else { 1151 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1152 } 1153 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1154 info->fill_ratio_needed = 0; 1155 info->factor_mallocs = 0; 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNCT__ 1160 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1161 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg) 1162 { 1163 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1164 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 switch (op) { 1169 case MAT_NEW_NONZERO_LOCATIONS: 1170 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1171 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1172 case MAT_KEEP_NONZERO_PATTERN: 1173 case MAT_NEW_NONZERO_LOCATION_ERR: 1174 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1175 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1176 break; 1177 case MAT_ROW_ORIENTED: 1178 a->roworiented = flg; 1179 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1180 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1181 break; 1182 case MAT_NEW_DIAGONALS: 1183 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1184 break; 1185 case MAT_IGNORE_OFF_PROC_ENTRIES: 1186 a->donotstash = flg; 1187 break; 1188 case MAT_USE_HASH_TABLE: 1189 a->ht_flag = flg; 1190 break; 1191 case MAT_HERMITIAN: 1192 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1193 break; 1194 case MAT_SYMMETRIC: 1195 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1196 break; 1197 case MAT_STRUCTURALLY_SYMMETRIC: 1198 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1199 break; 1200 case MAT_SYMMETRY_ETERNAL: 1201 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1202 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1203 break; 1204 case MAT_IGNORE_LOWER_TRIANGULAR: 1205 aA->ignore_ltriangular = flg; 1206 break; 1207 case MAT_ERROR_LOWER_TRIANGULAR: 1208 aA->ignore_ltriangular = flg; 1209 break; 1210 case MAT_GETROW_UPPERTRIANGULAR: 1211 aA->getrow_utriangular = flg; 1212 break; 1213 default: 1214 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1215 } 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1221 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1222 { 1223 PetscErrorCode ierr; 1224 PetscFunctionBegin; 1225 if (MAT_INITIAL_MATRIX || *B != A) { 1226 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1227 } 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1233 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1234 { 1235 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1236 Mat a=baij->A, b=baij->B; 1237 PetscErrorCode ierr; 1238 PetscInt nv,m,n; 1239 PetscTruth flg; 1240 1241 PetscFunctionBegin; 1242 if (ll != rr){ 1243 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1244 if (!flg) 1245 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1246 } 1247 if (!ll) PetscFunctionReturn(0); 1248 1249 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1250 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1251 1252 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1253 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1254 1255 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1256 1257 /* left diagonalscale the off-diagonal part */ 1258 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1259 1260 /* scale the diagonal part */ 1261 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1262 1263 /* right diagonalscale the off-diagonal part */ 1264 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1271 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1272 { 1273 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatEqual_MPISBAIJ" 1285 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1286 { 1287 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1288 Mat a,b,c,d; 1289 PetscTruth flg; 1290 PetscErrorCode ierr; 1291 1292 PetscFunctionBegin; 1293 a = matA->A; b = matA->B; 1294 c = matB->A; d = matB->B; 1295 1296 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1297 if (flg) { 1298 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1299 } 1300 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatCopy_MPISBAIJ" 1306 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1307 { 1308 PetscErrorCode ierr; 1309 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1310 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1311 1312 PetscFunctionBegin; 1313 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1314 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1315 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1316 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1317 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1318 } else { 1319 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1320 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1321 } 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1327 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1328 { 1329 PetscErrorCode ierr; 1330 1331 PetscFunctionBegin; 1332 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 #include "petscblaslapack.h" 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1339 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1340 { 1341 PetscErrorCode ierr; 1342 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1343 PetscBLASInt bnz,one=1; 1344 Mat_SeqSBAIJ *xa,*ya; 1345 Mat_SeqBAIJ *xb,*yb; 1346 1347 PetscFunctionBegin; 1348 if (str == SAME_NONZERO_PATTERN) { 1349 PetscScalar alpha = a; 1350 xa = (Mat_SeqSBAIJ *)xx->A->data; 1351 ya = (Mat_SeqSBAIJ *)yy->A->data; 1352 bnz = PetscBLASIntCast(xa->nz); 1353 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1354 xb = (Mat_SeqBAIJ *)xx->B->data; 1355 yb = (Mat_SeqBAIJ *)yy->B->data; 1356 bnz = PetscBLASIntCast(xb->nz); 1357 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1358 } else { 1359 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1360 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1361 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1362 } 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1368 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1369 { 1370 PetscErrorCode ierr; 1371 PetscInt i; 1372 PetscTruth flg; 1373 1374 PetscFunctionBegin; 1375 for (i=0; i<n; i++) { 1376 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1377 if (!flg) { 1378 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1379 } 1380 } 1381 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 1386 /* -------------------------------------------------------------------*/ 1387 static struct _MatOps MatOps_Values = { 1388 MatSetValues_MPISBAIJ, 1389 MatGetRow_MPISBAIJ, 1390 MatRestoreRow_MPISBAIJ, 1391 MatMult_MPISBAIJ, 1392 /* 4*/ MatMultAdd_MPISBAIJ, 1393 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1394 MatMultAdd_MPISBAIJ, 1395 0, 1396 0, 1397 0, 1398 /*10*/ 0, 1399 0, 1400 0, 1401 MatSOR_MPISBAIJ, 1402 MatTranspose_MPISBAIJ, 1403 /*15*/ MatGetInfo_MPISBAIJ, 1404 MatEqual_MPISBAIJ, 1405 MatGetDiagonal_MPISBAIJ, 1406 MatDiagonalScale_MPISBAIJ, 1407 MatNorm_MPISBAIJ, 1408 /*20*/ MatAssemblyBegin_MPISBAIJ, 1409 MatAssemblyEnd_MPISBAIJ, 1410 MatSetOption_MPISBAIJ, 1411 MatZeroEntries_MPISBAIJ, 1412 /*24*/ 0, 1413 0, 1414 0, 1415 0, 1416 0, 1417 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1418 0, 1419 0, 1420 0, 1421 0, 1422 /*34*/ MatDuplicate_MPISBAIJ, 1423 0, 1424 0, 1425 0, 1426 0, 1427 /*39*/ MatAXPY_MPISBAIJ, 1428 MatGetSubMatrices_MPISBAIJ, 1429 MatIncreaseOverlap_MPISBAIJ, 1430 MatGetValues_MPISBAIJ, 1431 MatCopy_MPISBAIJ, 1432 /*44*/ 0, 1433 MatScale_MPISBAIJ, 1434 0, 1435 0, 1436 0, 1437 /*49*/ 0, 1438 0, 1439 0, 1440 0, 1441 0, 1442 /*54*/ 0, 1443 0, 1444 MatSetUnfactored_MPISBAIJ, 1445 0, 1446 MatSetValuesBlocked_MPISBAIJ, 1447 /*59*/ 0, 1448 0, 1449 0, 1450 0, 1451 0, 1452 /*64*/ 0, 1453 0, 1454 0, 1455 0, 1456 0, 1457 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1458 0, 1459 0, 1460 0, 1461 0, 1462 /*74*/ 0, 1463 0, 1464 0, 1465 0, 1466 0, 1467 /*79*/ 0, 1468 0, 1469 0, 1470 0, 1471 MatLoad_MPISBAIJ, 1472 /*84*/ 0, 1473 0, 1474 0, 1475 0, 1476 0, 1477 /*89*/ 0, 1478 0, 1479 0, 1480 0, 1481 0, 1482 /*94*/ 0, 1483 0, 1484 0, 1485 0, 1486 0, 1487 /*99*/ 0, 1488 0, 1489 0, 1490 0, 1491 0, 1492 /*104*/0, 1493 MatRealPart_MPISBAIJ, 1494 MatImaginaryPart_MPISBAIJ, 1495 MatGetRowUpperTriangular_MPISBAIJ, 1496 MatRestoreRowUpperTriangular_MPISBAIJ 1497 }; 1498 1499 1500 EXTERN_C_BEGIN 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1503 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1504 { 1505 PetscFunctionBegin; 1506 *a = ((Mat_MPISBAIJ *)A->data)->A; 1507 *iscopy = PETSC_FALSE; 1508 PetscFunctionReturn(0); 1509 } 1510 EXTERN_C_END 1511 1512 EXTERN_C_BEGIN 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1515 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1516 { 1517 Mat_MPISBAIJ *b; 1518 PetscErrorCode ierr; 1519 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1520 1521 PetscFunctionBegin; 1522 if (bs < 0){ 1523 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1524 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1525 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1526 bs = PetscAbs(bs); 1527 } 1528 if ((d_nnz || o_nnz) && newbs != bs) { 1529 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1530 } 1531 bs = newbs; 1532 1533 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1534 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1535 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1536 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1537 1538 B->rmap->bs = B->cmap->bs = bs; 1539 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1540 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1541 1542 if (d_nnz) { 1543 for (i=0; i<B->rmap->n/bs; i++) { 1544 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1545 } 1546 } 1547 if (o_nnz) { 1548 for (i=0; i<B->rmap->n/bs; i++) { 1549 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1550 } 1551 } 1552 1553 b = (Mat_MPISBAIJ*)B->data; 1554 mbs = B->rmap->n/bs; 1555 Mbs = B->rmap->N/bs; 1556 if (mbs*bs != B->rmap->n) { 1557 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1558 } 1559 1560 B->rmap->bs = bs; 1561 b->bs2 = bs*bs; 1562 b->mbs = mbs; 1563 b->nbs = mbs; 1564 b->Mbs = Mbs; 1565 b->Nbs = Mbs; 1566 1567 for (i=0; i<=b->size; i++) { 1568 b->rangebs[i] = B->rmap->range[i]/bs; 1569 } 1570 b->rstartbs = B->rmap->rstart/bs; 1571 b->rendbs = B->rmap->rend/bs; 1572 1573 b->cstartbs = B->cmap->rstart/bs; 1574 b->cendbs = B->cmap->rend/bs; 1575 1576 if (!B->preallocated) { 1577 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1578 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1579 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1580 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1581 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1582 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1583 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1584 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1585 /* build cache for off array entries formed */ 1586 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1587 } 1588 1589 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1590 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1591 B->preallocated = PETSC_TRUE; 1592 PetscFunctionReturn(0); 1593 } 1594 EXTERN_C_END 1595 1596 EXTERN_C_BEGIN 1597 #if defined(PETSC_HAVE_MUMPS) 1598 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*); 1599 #endif 1600 #if defined(PETSC_HAVE_SPOOLES) 1601 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1602 #endif 1603 #if defined(PETSC_HAVE_PASTIX) 1604 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1605 #endif 1606 EXTERN_C_END 1607 1608 /*MC 1609 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1610 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1611 1612 Options Database Keys: 1613 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1614 1615 Level: beginner 1616 1617 .seealso: MatCreateMPISBAIJ 1618 M*/ 1619 1620 EXTERN_C_BEGIN 1621 #undef __FUNCT__ 1622 #define __FUNCT__ "MatCreate_MPISBAIJ" 1623 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1624 { 1625 Mat_MPISBAIJ *b; 1626 PetscErrorCode ierr; 1627 PetscTruth flg; 1628 1629 PetscFunctionBegin; 1630 1631 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1632 B->data = (void*)b; 1633 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1634 1635 B->ops->destroy = MatDestroy_MPISBAIJ; 1636 B->ops->view = MatView_MPISBAIJ; 1637 B->mapping = 0; 1638 B->assembled = PETSC_FALSE; 1639 1640 B->insertmode = NOT_SET_VALUES; 1641 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1642 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1643 1644 /* build local table of row and column ownerships */ 1645 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1646 1647 /* build cache for off array entries formed */ 1648 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1649 b->donotstash = PETSC_FALSE; 1650 b->colmap = PETSC_NULL; 1651 b->garray = PETSC_NULL; 1652 b->roworiented = PETSC_TRUE; 1653 1654 /* stuff used in block assembly */ 1655 b->barray = 0; 1656 1657 /* stuff used for matrix vector multiply */ 1658 b->lvec = 0; 1659 b->Mvctx = 0; 1660 b->slvec0 = 0; 1661 b->slvec0b = 0; 1662 b->slvec1 = 0; 1663 b->slvec1a = 0; 1664 b->slvec1b = 0; 1665 b->sMvctx = 0; 1666 1667 /* stuff for MatGetRow() */ 1668 b->rowindices = 0; 1669 b->rowvalues = 0; 1670 b->getrowactive = PETSC_FALSE; 1671 1672 /* hash table stuff */ 1673 b->ht = 0; 1674 b->hd = 0; 1675 b->ht_size = 0; 1676 b->ht_flag = PETSC_FALSE; 1677 b->ht_fact = 0; 1678 b->ht_total_ct = 0; 1679 b->ht_insert_ct = 0; 1680 1681 b->in_loc = 0; 1682 b->v_loc = 0; 1683 b->n_loc = 0; 1684 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1685 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1686 if (flg) { 1687 PetscReal fact = 1.39; 1688 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1689 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1690 if (fact <= 1.0) fact = 1.39; 1691 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1692 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1693 } 1694 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1695 1696 #if defined(PETSC_HAVE_PASTIX) 1697 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1698 "MatGetFactor_mpisbaij_pastix", 1699 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1700 #endif 1701 #if defined(PETSC_HAVE_MUMPS) 1702 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1703 "MatGetFactor_mpisbaij_mumps", 1704 MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr); 1705 #endif 1706 #if defined(PETSC_HAVE_SPOOLES) 1707 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1708 "MatGetFactor_mpisbaij_spooles", 1709 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1710 #endif 1711 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1712 "MatStoreValues_MPISBAIJ", 1713 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1714 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1715 "MatRetrieveValues_MPISBAIJ", 1716 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1717 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1718 "MatGetDiagonalBlock_MPISBAIJ", 1719 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1720 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1721 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1722 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1723 B->symmetric = PETSC_TRUE; 1724 B->structurally_symmetric = PETSC_TRUE; 1725 B->symmetric_set = PETSC_TRUE; 1726 B->structurally_symmetric_set = PETSC_TRUE; 1727 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1728 PetscFunctionReturn(0); 1729 } 1730 EXTERN_C_END 1731 1732 /*MC 1733 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1734 1735 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1736 and MATMPISBAIJ otherwise. 1737 1738 Options Database Keys: 1739 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1740 1741 Level: beginner 1742 1743 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1744 M*/ 1745 1746 EXTERN_C_BEGIN 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatCreate_SBAIJ" 1749 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1750 { 1751 PetscErrorCode ierr; 1752 PetscMPIInt size; 1753 1754 PetscFunctionBegin; 1755 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1756 if (size == 1) { 1757 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1758 } else { 1759 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1760 } 1761 PetscFunctionReturn(0); 1762 } 1763 EXTERN_C_END 1764 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1767 /*@C 1768 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1769 the user should preallocate the matrix storage by setting the parameters 1770 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1771 performance can be increased by more than a factor of 50. 1772 1773 Collective on Mat 1774 1775 Input Parameters: 1776 + A - the matrix 1777 . bs - size of blockk 1778 . d_nz - number of block nonzeros per block row in diagonal portion of local 1779 submatrix (same for all local rows) 1780 . d_nnz - array containing the number of block nonzeros in the various block rows 1781 in the upper triangular and diagonal part of the in diagonal portion of the local 1782 (possibly different for each block row) or PETSC_NULL. You must leave room 1783 for the diagonal entry even if it is zero. 1784 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1785 submatrix (same for all local rows). 1786 - o_nnz - array containing the number of nonzeros in the various block rows of the 1787 off-diagonal portion of the local submatrix (possibly different for 1788 each block row) or PETSC_NULL. 1789 1790 1791 Options Database Keys: 1792 . -mat_no_unroll - uses code that does not unroll the loops in the 1793 block calculations (much slower) 1794 . -mat_block_size - size of the blocks to use 1795 1796 Notes: 1797 1798 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1799 than it must be used on all processors that share the object for that argument. 1800 1801 If the *_nnz parameter is given then the *_nz parameter is ignored 1802 1803 Storage Information: 1804 For a square global matrix we define each processor's diagonal portion 1805 to be its local rows and the corresponding columns (a square submatrix); 1806 each processor's off-diagonal portion encompasses the remainder of the 1807 local matrix (a rectangular submatrix). 1808 1809 The user can specify preallocated storage for the diagonal part of 1810 the local submatrix with either d_nz or d_nnz (not both). Set 1811 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1812 memory allocation. Likewise, specify preallocated storage for the 1813 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1814 1815 You can call MatGetInfo() to get information on how effective the preallocation was; 1816 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1817 You can also run with the option -info and look for messages with the string 1818 malloc in them to see if additional memory allocation was needed. 1819 1820 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1821 the figure below we depict these three local rows and all columns (0-11). 1822 1823 .vb 1824 0 1 2 3 4 5 6 7 8 9 10 11 1825 ------------------- 1826 row 3 | o o o d d d o o o o o o 1827 row 4 | o o o d d d o o o o o o 1828 row 5 | o o o d d d o o o o o o 1829 ------------------- 1830 .ve 1831 1832 Thus, any entries in the d locations are stored in the d (diagonal) 1833 submatrix, and any entries in the o locations are stored in the 1834 o (off-diagonal) submatrix. Note that the d matrix is stored in 1835 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1836 1837 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1838 plus the diagonal part of the d matrix, 1839 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1840 In general, for PDE problems in which most nonzeros are near the diagonal, 1841 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1842 or you will get TERRIBLE performance; see the users' manual chapter on 1843 matrices. 1844 1845 Level: intermediate 1846 1847 .keywords: matrix, block, aij, compressed row, sparse, parallel 1848 1849 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1850 @*/ 1851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1852 { 1853 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1854 1855 PetscFunctionBegin; 1856 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1857 if (f) { 1858 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1859 } 1860 PetscFunctionReturn(0); 1861 } 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatCreateMPISBAIJ" 1865 /*@C 1866 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1867 (block compressed row). For good matrix assembly performance 1868 the user should preallocate the matrix storage by setting the parameters 1869 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1870 performance can be increased by more than a factor of 50. 1871 1872 Collective on MPI_Comm 1873 1874 Input Parameters: 1875 + comm - MPI communicator 1876 . bs - size of blockk 1877 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1878 This value should be the same as the local size used in creating the 1879 y vector for the matrix-vector product y = Ax. 1880 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1881 This value should be the same as the local size used in creating the 1882 x vector for the matrix-vector product y = Ax. 1883 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1884 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1885 . d_nz - number of block nonzeros per block row in diagonal portion of local 1886 submatrix (same for all local rows) 1887 . d_nnz - array containing the number of block nonzeros in the various block rows 1888 in the upper triangular portion of the in diagonal portion of the local 1889 (possibly different for each block block row) or PETSC_NULL. 1890 You must leave room for the diagonal entry even if it is zero. 1891 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1892 submatrix (same for all local rows). 1893 - o_nnz - array containing the number of nonzeros in the various block rows of the 1894 off-diagonal portion of the local submatrix (possibly different for 1895 each block row) or PETSC_NULL. 1896 1897 Output Parameter: 1898 . A - the matrix 1899 1900 Options Database Keys: 1901 . -mat_no_unroll - uses code that does not unroll the loops in the 1902 block calculations (much slower) 1903 . -mat_block_size - size of the blocks to use 1904 . -mat_mpi - use the parallel matrix data structures even on one processor 1905 (defaults to using SeqBAIJ format on one processor) 1906 1907 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1908 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1909 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1910 1911 Notes: 1912 The number of rows and columns must be divisible by blocksize. 1913 This matrix type does not support complex Hermitian operation. 1914 1915 The user MUST specify either the local or global matrix dimensions 1916 (possibly both). 1917 1918 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1919 than it must be used on all processors that share the object for that argument. 1920 1921 If the *_nnz parameter is given then the *_nz parameter is ignored 1922 1923 Storage Information: 1924 For a square global matrix we define each processor's diagonal portion 1925 to be its local rows and the corresponding columns (a square submatrix); 1926 each processor's off-diagonal portion encompasses the remainder of the 1927 local matrix (a rectangular submatrix). 1928 1929 The user can specify preallocated storage for the diagonal part of 1930 the local submatrix with either d_nz or d_nnz (not both). Set 1931 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1932 memory allocation. Likewise, specify preallocated storage for the 1933 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1934 1935 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1936 the figure below we depict these three local rows and all columns (0-11). 1937 1938 .vb 1939 0 1 2 3 4 5 6 7 8 9 10 11 1940 ------------------- 1941 row 3 | o o o d d d o o o o o o 1942 row 4 | o o o d d d o o o o o o 1943 row 5 | o o o d d d o o o o o o 1944 ------------------- 1945 .ve 1946 1947 Thus, any entries in the d locations are stored in the d (diagonal) 1948 submatrix, and any entries in the o locations are stored in the 1949 o (off-diagonal) submatrix. Note that the d matrix is stored in 1950 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1951 1952 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1953 plus the diagonal part of the d matrix, 1954 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1955 In general, for PDE problems in which most nonzeros are near the diagonal, 1956 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1957 or you will get TERRIBLE performance; see the users' manual chapter on 1958 matrices. 1959 1960 Level: intermediate 1961 1962 .keywords: matrix, block, aij, compressed row, sparse, parallel 1963 1964 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1965 @*/ 1966 1967 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 1968 { 1969 PetscErrorCode ierr; 1970 PetscMPIInt size; 1971 1972 PetscFunctionBegin; 1973 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1974 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1975 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1976 if (size > 1) { 1977 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1978 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1979 } else { 1980 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1981 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1982 } 1983 PetscFunctionReturn(0); 1984 } 1985 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1989 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1990 { 1991 Mat mat; 1992 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1993 PetscErrorCode ierr; 1994 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 1995 PetscScalar *array; 1996 1997 PetscFunctionBegin; 1998 *newmat = 0; 1999 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2000 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2001 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2002 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2003 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2004 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2005 2006 mat->factor = matin->factor; 2007 mat->preallocated = PETSC_TRUE; 2008 mat->assembled = PETSC_TRUE; 2009 mat->insertmode = NOT_SET_VALUES; 2010 2011 a = (Mat_MPISBAIJ*)mat->data; 2012 a->bs2 = oldmat->bs2; 2013 a->mbs = oldmat->mbs; 2014 a->nbs = oldmat->nbs; 2015 a->Mbs = oldmat->Mbs; 2016 a->Nbs = oldmat->Nbs; 2017 2018 2019 a->size = oldmat->size; 2020 a->rank = oldmat->rank; 2021 a->donotstash = oldmat->donotstash; 2022 a->roworiented = oldmat->roworiented; 2023 a->rowindices = 0; 2024 a->rowvalues = 0; 2025 a->getrowactive = PETSC_FALSE; 2026 a->barray = 0; 2027 a->rstartbs = oldmat->rstartbs; 2028 a->rendbs = oldmat->rendbs; 2029 a->cstartbs = oldmat->cstartbs; 2030 a->cendbs = oldmat->cendbs; 2031 2032 /* hash table stuff */ 2033 a->ht = 0; 2034 a->hd = 0; 2035 a->ht_size = 0; 2036 a->ht_flag = oldmat->ht_flag; 2037 a->ht_fact = oldmat->ht_fact; 2038 a->ht_total_ct = 0; 2039 a->ht_insert_ct = 0; 2040 2041 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2042 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2043 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2044 if (oldmat->colmap) { 2045 #if defined (PETSC_USE_CTABLE) 2046 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2047 #else 2048 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2049 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2050 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2051 #endif 2052 } else a->colmap = 0; 2053 2054 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2055 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2056 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2057 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2058 } else a->garray = 0; 2059 2060 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2061 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2062 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2063 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2064 2065 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2066 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2067 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2068 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2069 2070 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2071 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2072 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2073 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2074 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2075 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2076 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2077 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2078 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2079 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2080 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2081 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2082 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2083 2084 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2085 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2086 a->sMvctx = oldmat->sMvctx; 2087 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2088 2089 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2090 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2091 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2092 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2093 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2094 *newmat = mat; 2095 PetscFunctionReturn(0); 2096 } 2097 2098 #include "petscsys.h" 2099 2100 #undef __FUNCT__ 2101 #define __FUNCT__ "MatLoad_MPISBAIJ" 2102 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2103 { 2104 Mat A; 2105 PetscErrorCode ierr; 2106 PetscInt i,nz,j,rstart,rend; 2107 PetscScalar *vals,*buf; 2108 MPI_Comm comm = ((PetscObject)viewer)->comm; 2109 MPI_Status status; 2110 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2111 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2112 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2113 PetscInt bs=1,Mbs,mbs,extra_rows; 2114 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2115 PetscInt dcount,kmax,k,nzcount,tmp; 2116 int fd; 2117 2118 PetscFunctionBegin; 2119 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2120 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2121 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2122 2123 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2124 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2125 if (!rank) { 2126 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2127 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2128 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2129 if (header[3] < 0) { 2130 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2131 } 2132 } 2133 2134 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2135 M = header[1]; N = header[2]; 2136 2137 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2138 2139 /* 2140 This code adds extra rows to make sure the number of rows is 2141 divisible by the blocksize 2142 */ 2143 Mbs = M/bs; 2144 extra_rows = bs - M + bs*(Mbs); 2145 if (extra_rows == bs) extra_rows = 0; 2146 else Mbs++; 2147 if (extra_rows &&!rank) { 2148 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2149 } 2150 2151 /* determine ownership of all rows */ 2152 mbs = Mbs/size + ((Mbs % size) > rank); 2153 m = mbs*bs; 2154 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2155 browners = rowners + size + 1; 2156 mmbs = PetscMPIIntCast(mbs); 2157 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2158 rowners[0] = 0; 2159 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2160 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2161 rstart = rowners[rank]; 2162 rend = rowners[rank+1]; 2163 2164 /* distribute row lengths to all processors */ 2165 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2166 if (!rank) { 2167 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2168 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2169 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2170 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2171 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2172 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2173 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2174 } else { 2175 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2176 } 2177 2178 if (!rank) { /* procs[0] */ 2179 /* calculate the number of nonzeros on each processor */ 2180 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2181 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2182 for (i=0; i<size; i++) { 2183 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2184 procsnz[i] += rowlengths[j]; 2185 } 2186 } 2187 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2188 2189 /* determine max buffer needed and allocate it */ 2190 maxnz = 0; 2191 for (i=0; i<size; i++) { 2192 maxnz = PetscMax(maxnz,procsnz[i]); 2193 } 2194 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2195 2196 /* read in my part of the matrix column indices */ 2197 nz = procsnz[0]; 2198 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2199 mycols = ibuf; 2200 if (size == 1) nz -= extra_rows; 2201 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2202 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2203 2204 /* read in every ones (except the last) and ship off */ 2205 for (i=1; i<size-1; i++) { 2206 nz = procsnz[i]; 2207 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2208 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2209 } 2210 /* read in the stuff for the last proc */ 2211 if (size != 1) { 2212 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2213 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2214 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2215 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2216 } 2217 ierr = PetscFree(cols);CHKERRQ(ierr); 2218 } else { /* procs[i], i>0 */ 2219 /* determine buffer space needed for message */ 2220 nz = 0; 2221 for (i=0; i<m; i++) { 2222 nz += locrowlens[i]; 2223 } 2224 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2225 mycols = ibuf; 2226 /* receive message of column indices*/ 2227 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2228 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2229 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2230 } 2231 2232 /* loop over local rows, determining number of off diagonal entries */ 2233 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2234 odlens = dlens + (rend-rstart); 2235 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2236 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2237 masked1 = mask + Mbs; 2238 masked2 = masked1 + Mbs; 2239 rowcount = 0; nzcount = 0; 2240 for (i=0; i<mbs; i++) { 2241 dcount = 0; 2242 odcount = 0; 2243 for (j=0; j<bs; j++) { 2244 kmax = locrowlens[rowcount]; 2245 for (k=0; k<kmax; k++) { 2246 tmp = mycols[nzcount++]/bs; /* block col. index */ 2247 if (!mask[tmp]) { 2248 mask[tmp] = 1; 2249 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2250 else masked1[dcount++] = tmp; /* entry in diag portion */ 2251 } 2252 } 2253 rowcount++; 2254 } 2255 2256 dlens[i] = dcount; /* d_nzz[i] */ 2257 odlens[i] = odcount; /* o_nzz[i] */ 2258 2259 /* zero out the mask elements we set */ 2260 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2261 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2262 } 2263 2264 /* create our matrix */ 2265 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2266 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2267 ierr = MatSetType(A,type);CHKERRQ(ierr); 2268 ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2269 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2270 2271 if (!rank) { 2272 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2273 /* read in my part of the matrix numerical values */ 2274 nz = procsnz[0]; 2275 vals = buf; 2276 mycols = ibuf; 2277 if (size == 1) nz -= extra_rows; 2278 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2279 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2280 2281 /* insert into matrix */ 2282 jj = rstart*bs; 2283 for (i=0; i<m; i++) { 2284 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2285 mycols += locrowlens[i]; 2286 vals += locrowlens[i]; 2287 jj++; 2288 } 2289 2290 /* read in other processors (except the last one) and ship out */ 2291 for (i=1; i<size-1; i++) { 2292 nz = procsnz[i]; 2293 vals = buf; 2294 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2295 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2296 } 2297 /* the last proc */ 2298 if (size != 1){ 2299 nz = procsnz[i] - extra_rows; 2300 vals = buf; 2301 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2302 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2303 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2304 } 2305 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2306 2307 } else { 2308 /* receive numeric values */ 2309 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2310 2311 /* receive message of values*/ 2312 vals = buf; 2313 mycols = ibuf; 2314 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2315 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2316 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2317 2318 /* insert into matrix */ 2319 jj = rstart*bs; 2320 for (i=0; i<m; i++) { 2321 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2322 mycols += locrowlens[i]; 2323 vals += locrowlens[i]; 2324 jj++; 2325 } 2326 } 2327 2328 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2329 ierr = PetscFree(buf);CHKERRQ(ierr); 2330 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2331 ierr = PetscFree(rowners);CHKERRQ(ierr); 2332 ierr = PetscFree(dlens);CHKERRQ(ierr); 2333 ierr = PetscFree(mask);CHKERRQ(ierr); 2334 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2335 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2336 *newmat = A; 2337 PetscFunctionReturn(0); 2338 } 2339 2340 #undef __FUNCT__ 2341 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2342 /*XXXXX@ 2343 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2344 2345 Input Parameters: 2346 . mat - the matrix 2347 . fact - factor 2348 2349 Collective on Mat 2350 2351 Level: advanced 2352 2353 Notes: 2354 This can also be set by the command line option: -mat_use_hash_table fact 2355 2356 .keywords: matrix, hashtable, factor, HT 2357 2358 .seealso: MatSetOption() 2359 @XXXXX*/ 2360 2361 2362 #undef __FUNCT__ 2363 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2364 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2365 { 2366 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2367 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2368 PetscReal atmp; 2369 PetscReal *work,*svalues,*rvalues; 2370 PetscErrorCode ierr; 2371 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2372 PetscMPIInt rank,size; 2373 PetscInt *rowners_bs,dest,count,source; 2374 PetscScalar *va; 2375 MatScalar *ba; 2376 MPI_Status stat; 2377 2378 PetscFunctionBegin; 2379 if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2380 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2381 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2382 2383 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2384 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2385 2386 bs = A->rmap->bs; 2387 mbs = a->mbs; 2388 Mbs = a->Mbs; 2389 ba = b->a; 2390 bi = b->i; 2391 bj = b->j; 2392 2393 /* find ownerships */ 2394 rowners_bs = A->rmap->range; 2395 2396 /* each proc creates an array to be distributed */ 2397 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2398 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2399 2400 /* row_max for B */ 2401 if (rank != size-1){ 2402 for (i=0; i<mbs; i++) { 2403 ncols = bi[1] - bi[0]; bi++; 2404 brow = bs*i; 2405 for (j=0; j<ncols; j++){ 2406 bcol = bs*(*bj); 2407 for (kcol=0; kcol<bs; kcol++){ 2408 col = bcol + kcol; /* local col index */ 2409 col += rowners_bs[rank+1]; /* global col index */ 2410 for (krow=0; krow<bs; krow++){ 2411 atmp = PetscAbsScalar(*ba); ba++; 2412 row = brow + krow; /* local row index */ 2413 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2414 if (work[col] < atmp) work[col] = atmp; 2415 } 2416 } 2417 bj++; 2418 } 2419 } 2420 2421 /* send values to its owners */ 2422 for (dest=rank+1; dest<size; dest++){ 2423 svalues = work + rowners_bs[dest]; 2424 count = rowners_bs[dest+1]-rowners_bs[dest]; 2425 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2426 } 2427 } 2428 2429 /* receive values */ 2430 if (rank){ 2431 rvalues = work; 2432 count = rowners_bs[rank+1]-rowners_bs[rank]; 2433 for (source=0; source<rank; source++){ 2434 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2435 /* process values */ 2436 for (i=0; i<count; i++){ 2437 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2438 } 2439 } 2440 } 2441 2442 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2443 ierr = PetscFree(work);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatSOR_MPISBAIJ" 2449 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2450 { 2451 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2452 PetscErrorCode ierr; 2453 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2454 PetscScalar *x,*b,*ptr,*from; 2455 Vec bb1; 2456 2457 PetscFunctionBegin; 2458 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2459 if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2460 2461 if (flag == SOR_APPLY_UPPER) { 2462 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2463 PetscFunctionReturn(0); 2464 } 2465 2466 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2467 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2468 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2469 its--; 2470 } 2471 2472 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2473 while (its--){ 2474 2475 /* lower triangular part: slvec0b = - B^T*xx */ 2476 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2477 2478 /* copy xx into slvec0a */ 2479 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2480 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2481 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2482 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2483 2484 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2485 2486 /* copy bb into slvec1a */ 2487 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2488 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2489 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2490 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2491 2492 /* set slvec1b = 0 */ 2493 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2494 2495 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2496 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2497 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2498 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2499 2500 /* upper triangular part: bb1 = bb1 - B*x */ 2501 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2502 2503 /* local diagonal sweep */ 2504 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2505 } 2506 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2507 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2508 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2509 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2510 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2511 } else if (flag & SOR_EISENSTAT) { 2512 Vec xx1; 2513 PetscTruth hasop; 2514 const PetscScalar *diag; 2515 PetscScalar *sl,scale = (omega - 2.0)/omega; 2516 PetscInt i,n; 2517 2518 if (!mat->xx1) { 2519 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2520 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2521 } 2522 xx1 = mat->xx1; 2523 bb1 = mat->bb1; 2524 2525 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2526 2527 if (!mat->diag) { 2528 /* this is wrong for same matrix with new nonzero values */ 2529 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2530 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2531 } 2532 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2533 2534 if (hasop) { 2535 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2536 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2537 } else { 2538 /* 2539 These two lines are replaced by code that may be a bit faster for a good compiler 2540 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2541 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2542 */ 2543 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2544 ierr = VecGetArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr); 2545 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2546 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2547 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2548 if (omega == 1.0) { 2549 for (i=0; i<n; i++) { 2550 sl[i] = b[i] - diag[i]*x[i]; 2551 } 2552 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2553 } else { 2554 for (i=0; i<n; i++) { 2555 sl[i] = b[i] + scale*diag[i]*x[i]; 2556 } 2557 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2558 } 2559 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2560 ierr = VecRestoreArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr); 2561 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2562 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2563 } 2564 2565 /* multiply off-diagonal portion of matrix */ 2566 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2567 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2568 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2569 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2570 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2571 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2572 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2573 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2574 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2575 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2576 2577 /* local sweep */ 2578 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2579 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2580 } else { 2581 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2582 } 2583 PetscFunctionReturn(0); 2584 } 2585 2586 #undef __FUNCT__ 2587 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2588 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2589 { 2590 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2591 PetscErrorCode ierr; 2592 Vec lvec1,bb1; 2593 2594 PetscFunctionBegin; 2595 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2596 if (matin->rmap->bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2597 2598 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2599 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2600 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2601 its--; 2602 } 2603 2604 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2605 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2606 while (its--){ 2607 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2608 2609 /* lower diagonal part: bb1 = bb - B^T*xx */ 2610 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2611 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2612 2613 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2614 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2615 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2616 2617 /* upper diagonal part: bb1 = bb1 - B*x */ 2618 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2619 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2620 2621 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2622 2623 /* diagonal sweep */ 2624 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2625 } 2626 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2627 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2628 } else { 2629 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2630 } 2631 PetscFunctionReturn(0); 2632 } 2633 2634