1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "../src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 EXTERN_C_BEGIN 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 26 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 27 { 28 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 29 PetscErrorCode ierr; 30 31 PetscFunctionBegin; 32 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 33 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN_C_BEGIN 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 EXTERN_C_END 52 53 54 #define CHUNKSIZE 10 55 56 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 57 { \ 58 \ 59 brow = row/bs; \ 60 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 61 rmax = aimax[brow]; nrow = ailen[brow]; \ 62 bcol = col/bs; \ 63 ridx = row % bs; cidx = col % bs; \ 64 low = 0; high = nrow; \ 65 while (high-low > 3) { \ 66 t = (low+high)/2; \ 67 if (rp[t] > bcol) high = t; \ 68 else low = t; \ 69 } \ 70 for (_i=low; _i<high; _i++) { \ 71 if (rp[_i] > bcol) break; \ 72 if (rp[_i] == bcol) { \ 73 bap = ap + bs2*_i + bs*cidx + ridx; \ 74 if (addv == ADD_VALUES) *bap += value; \ 75 else *bap = value; \ 76 goto a_noinsert; \ 77 } \ 78 } \ 79 if (a->nonew == 1) goto a_noinsert; \ 80 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 81 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 82 N = nrow++ - 1; \ 83 /* shift up all the later entries in this row */ \ 84 for (ii=N; ii>=_i; ii--) { \ 85 rp[ii+1] = rp[ii]; \ 86 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 87 } \ 88 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 89 rp[_i] = bcol; \ 90 ap[bs2*_i + bs*cidx + ridx] = value; \ 91 a_noinsert:; \ 92 ailen[brow] = nrow; \ 93 } 94 95 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 96 { \ 97 brow = row/bs; \ 98 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 99 rmax = bimax[brow]; nrow = bilen[brow]; \ 100 bcol = col/bs; \ 101 ridx = row % bs; cidx = col % bs; \ 102 low = 0; high = nrow; \ 103 while (high-low > 3) { \ 104 t = (low+high)/2; \ 105 if (rp[t] > bcol) high = t; \ 106 else low = t; \ 107 } \ 108 for (_i=low; _i<high; _i++) { \ 109 if (rp[_i] > bcol) break; \ 110 if (rp[_i] == bcol) { \ 111 bap = ap + bs2*_i + bs*cidx + ridx; \ 112 if (addv == ADD_VALUES) *bap += value; \ 113 else *bap = value; \ 114 goto b_noinsert; \ 115 } \ 116 } \ 117 if (b->nonew == 1) goto b_noinsert; \ 118 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 119 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 120 N = nrow++ - 1; \ 121 /* shift up all the later entries in this row */ \ 122 for (ii=N; ii>=_i; ii--) { \ 123 rp[ii+1] = rp[ii]; \ 124 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 125 } \ 126 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 127 rp[_i] = bcol; \ 128 ap[bs2*_i + bs*cidx + ridx] = value; \ 129 b_noinsert:; \ 130 bilen[brow] = nrow; \ 131 } 132 133 /* Only add/insert a(i,j) with i<=j (blocks). 134 Any a(i,j) with i>j input by user is ingored. 135 */ 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatSetValues_MPISBAIJ" 138 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 139 { 140 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 141 MatScalar value; 142 PetscTruth roworiented = baij->roworiented; 143 PetscErrorCode ierr; 144 PetscInt i,j,row,col; 145 PetscInt rstart_orig=mat->rmap->rstart; 146 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 147 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 148 149 /* Some Variables required in the macro */ 150 Mat A = baij->A; 151 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 152 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 153 MatScalar *aa=a->a; 154 155 Mat B = baij->B; 156 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 157 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 158 MatScalar *ba=b->a; 159 160 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 161 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 162 MatScalar *ap,*bap; 163 164 /* for stash */ 165 PetscInt n_loc, *in_loc = PETSC_NULL; 166 MatScalar *v_loc = PETSC_NULL; 167 168 PetscFunctionBegin; 169 if (v) PetscValidScalarPointer(v,6); 170 if (!baij->donotstash){ 171 if (n > baij->n_loc) { 172 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 173 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 174 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 175 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 176 baij->n_loc = n; 177 } 178 in_loc = baij->in_loc; 179 v_loc = baij->v_loc; 180 } 181 182 for (i=0; i<m; i++) { 183 if (im[i] < 0) continue; 184 #if defined(PETSC_USE_DEBUG) 185 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 186 #endif 187 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 188 row = im[i] - rstart_orig; /* local row index */ 189 for (j=0; j<n; j++) { 190 if (im[i]/bs > in[j]/bs){ 191 if (a->ignore_ltriangular){ 192 continue; /* ignore lower triangular blocks */ 193 } else { 194 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 195 } 196 } 197 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 198 col = in[j] - cstart_orig; /* local col index */ 199 brow = row/bs; bcol = col/bs; 200 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 201 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 202 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 203 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 204 } else if (in[j] < 0) continue; 205 #if defined(PETSC_USE_DEBUG) 206 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 207 #endif 208 else { /* off-diag entry (B) */ 209 if (mat->was_assembled) { 210 if (!baij->colmap) { 211 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 212 } 213 #if defined (PETSC_USE_CTABLE) 214 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 215 col = col - 1; 216 #else 217 col = baij->colmap[in[j]/bs] - 1; 218 #endif 219 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 220 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 221 col = in[j]; 222 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 223 B = baij->B; 224 b = (Mat_SeqBAIJ*)(B)->data; 225 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 226 ba=b->a; 227 } else col += in[j]%bs; 228 } else col = in[j]; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } 233 } 234 } else { /* off processor entry */ 235 if (!baij->donotstash) { 236 n_loc = 0; 237 for (j=0; j<n; j++){ 238 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 239 in_loc[n_loc] = in[j]; 240 if (roworiented) { 241 v_loc[n_loc] = v[i*n+j]; 242 } else { 243 v_loc[n_loc] = v[j*m+i]; 244 } 245 n_loc++; 246 } 247 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 257 { 258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 259 const MatScalar *value; 260 MatScalar *barray=baij->barray; 261 PetscTruth roworiented = baij->roworiented; 262 PetscErrorCode ierr; 263 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 264 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 265 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 266 267 PetscFunctionBegin; 268 if(!barray) { 269 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 270 baij->barray = barray; 271 } 272 273 if (roworiented) { 274 stepval = (n-1)*bs; 275 } else { 276 stepval = (m-1)*bs; 277 } 278 for (i=0; i<m; i++) { 279 if (im[i] < 0) continue; 280 #if defined(PETSC_USE_DEBUG) 281 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 282 #endif 283 if (im[i] >= rstart && im[i] < rend) { 284 row = im[i] - rstart; 285 for (j=0; j<n; j++) { 286 /* If NumCol = 1 then a copy is not required */ 287 if ((roworiented) && (n == 1)) { 288 barray = (MatScalar*) v + i*bs2; 289 } else if((!roworiented) && (m == 1)) { 290 barray = (MatScalar*) v + j*bs2; 291 } else { /* Here a copy is required */ 292 if (roworiented) { 293 value = v + i*(stepval+bs)*bs + j*bs; 294 } else { 295 value = v + j*(stepval+bs)*bs + i*bs; 296 } 297 for (ii=0; ii<bs; ii++,value+=stepval) { 298 for (jj=0; jj<bs; jj++) { 299 *barray++ = *value++; 300 } 301 } 302 barray -=bs2; 303 } 304 305 if (in[j] >= cstart && in[j] < cend){ 306 col = in[j] - cstart; 307 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 308 } 309 else if (in[j] < 0) continue; 310 #if defined(PETSC_USE_DEBUG) 311 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 312 #endif 313 else { 314 if (mat->was_assembled) { 315 if (!baij->colmap) { 316 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 317 } 318 319 #if defined(PETSC_USE_DEBUG) 320 #if defined (PETSC_USE_CTABLE) 321 { PetscInt data; 322 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 323 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 324 } 325 #else 326 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 327 #endif 328 #endif 329 #if defined (PETSC_USE_CTABLE) 330 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 331 col = (col - 1)/bs; 332 #else 333 col = (baij->colmap[in[j]] - 1)/bs; 334 #endif 335 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 336 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 337 col = in[j]; 338 } 339 } 340 else col = in[j]; 341 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 342 } 343 } 344 } else { 345 if (!baij->donotstash) { 346 if (roworiented) { 347 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 348 } else { 349 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 350 } 351 } 352 } 353 } 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatGetValues_MPISBAIJ" 359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 360 { 361 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 364 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 365 366 PetscFunctionBegin; 367 for (i=0; i<m; i++) { 368 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 369 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 370 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 371 row = idxm[i] - bsrstart; 372 for (j=0; j<n; j++) { 373 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 374 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 375 if (idxn[j] >= bscstart && idxn[j] < bscend){ 376 col = idxn[j] - bscstart; 377 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 378 } else { 379 if (!baij->colmap) { 380 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 381 } 382 #if defined (PETSC_USE_CTABLE) 383 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 384 data --; 385 #else 386 data = baij->colmap[idxn[j]/bs]-1; 387 #endif 388 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 389 else { 390 col = data + idxn[j]%bs; 391 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 392 } 393 } 394 } 395 } else { 396 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 397 } 398 } 399 PetscFunctionReturn(0); 400 } 401 402 #undef __FUNCT__ 403 #define __FUNCT__ "MatNorm_MPISBAIJ" 404 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 405 { 406 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 407 PetscErrorCode ierr; 408 PetscReal sum[2],*lnorm2; 409 410 PetscFunctionBegin; 411 if (baij->size == 1) { 412 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 413 } else { 414 if (type == NORM_FROBENIUS) { 415 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 416 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 417 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 418 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 419 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 420 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 421 *norm = sqrt(sum[0] + 2*sum[1]); 422 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 423 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 424 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 425 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 426 PetscReal *rsum,*rsum2,vabs; 427 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 428 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 429 MatScalar *v; 430 431 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 432 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 433 /* Amat */ 434 v = amat->a; jj = amat->j; 435 for (brow=0; brow<mbs; brow++) { 436 grow = bs*(rstart + brow); 437 nz = amat->i[brow+1] - amat->i[brow]; 438 for (bcol=0; bcol<nz; bcol++){ 439 gcol = bs*(rstart + *jj); jj++; 440 for (col=0; col<bs; col++){ 441 for (row=0; row<bs; row++){ 442 vabs = PetscAbsScalar(*v); v++; 443 rsum[gcol+col] += vabs; 444 /* non-diagonal block */ 445 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 446 } 447 } 448 } 449 } 450 /* Bmat */ 451 v = bmat->a; jj = bmat->j; 452 for (brow=0; brow<mbs; brow++) { 453 grow = bs*(rstart + brow); 454 nz = bmat->i[brow+1] - bmat->i[brow]; 455 for (bcol=0; bcol<nz; bcol++){ 456 gcol = bs*garray[*jj]; jj++; 457 for (col=0; col<bs; col++){ 458 for (row=0; row<bs; row++){ 459 vabs = PetscAbsScalar(*v); v++; 460 rsum[gcol+col] += vabs; 461 rsum[grow+row] += vabs; 462 } 463 } 464 } 465 } 466 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 *norm = 0.0; 468 for (col=0; col<mat->cmap->N; col++) { 469 if (rsum2[col] > *norm) *norm = rsum2[col]; 470 } 471 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 472 } else { 473 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 474 } 475 } 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 481 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 484 PetscErrorCode ierr; 485 PetscInt nstash,reallocs; 486 InsertMode addv; 487 488 PetscFunctionBegin; 489 if (baij->donotstash) { 490 PetscFunctionReturn(0); 491 } 492 493 /* make sure all processors are either in INSERTMODE or ADDMODE */ 494 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 495 if (addv == (ADD_VALUES|INSERT_VALUES)) { 496 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 497 } 498 mat->insertmode = addv; /* in case this processor had no cache */ 499 500 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 501 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 502 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 503 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 504 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 505 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 506 PetscFunctionReturn(0); 507 } 508 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 511 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 512 { 513 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 514 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 515 PetscErrorCode ierr; 516 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 517 PetscInt *row,*col; 518 PetscTruth other_disassembled; 519 PetscMPIInt n; 520 PetscTruth r1,r2,r3; 521 MatScalar *val; 522 InsertMode addv = mat->insertmode; 523 524 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 525 PetscFunctionBegin; 526 527 if (!baij->donotstash) { 528 while (1) { 529 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 530 if (!flg) break; 531 532 for (i=0; i<n;) { 533 /* Now identify the consecutive vals belonging to the same row */ 534 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 535 if (j < n) ncols = j-i; 536 else ncols = n-i; 537 /* Now assemble all these values with a single function call */ 538 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 539 i = j; 540 } 541 } 542 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 543 /* Now process the block-stash. Since the values are stashed column-oriented, 544 set the roworiented flag to column oriented, and after MatSetValues() 545 restore the original flags */ 546 r1 = baij->roworiented; 547 r2 = a->roworiented; 548 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 549 baij->roworiented = PETSC_FALSE; 550 a->roworiented = PETSC_FALSE; 551 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 552 while (1) { 553 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 554 if (!flg) break; 555 556 for (i=0; i<n;) { 557 /* Now identify the consecutive vals belonging to the same row */ 558 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 559 if (j < n) ncols = j-i; 560 else ncols = n-i; 561 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 562 i = j; 563 } 564 } 565 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 566 baij->roworiented = r1; 567 a->roworiented = r2; 568 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 569 } 570 571 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 572 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 573 574 /* determine if any processor has disassembled, if so we must 575 also disassemble ourselfs, in order that we may reassemble. */ 576 /* 577 if nonzero structure of submatrix B cannot change then we know that 578 no processor disassembled thus we can skip this stuff 579 */ 580 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 581 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 582 if (mat->was_assembled && !other_disassembled) { 583 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 584 } 585 } 586 587 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 588 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 589 } 590 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 591 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 592 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 593 594 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 595 baij->rowvalues = 0; 596 597 PetscFunctionReturn(0); 598 } 599 600 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 603 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 604 { 605 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt bs = mat->rmap->bs; 608 PetscMPIInt size = baij->size,rank = baij->rank; 609 PetscTruth iascii,isdraw; 610 PetscViewer sviewer; 611 PetscViewerFormat format; 612 613 PetscFunctionBegin; 614 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 615 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 616 if (iascii) { 617 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 618 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 619 MatInfo info; 620 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 621 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 622 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 623 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 624 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 625 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 626 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 627 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 628 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 629 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 630 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 631 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } else if (format == PETSC_VIEWER_ASCII_INFO) { 634 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 635 PetscFunctionReturn(0); 636 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 637 PetscFunctionReturn(0); 638 } 639 } 640 641 if (isdraw) { 642 PetscDraw draw; 643 PetscTruth isnull; 644 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 645 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 646 } 647 648 if (size == 1) { 649 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 650 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 651 } else { 652 /* assemble the entire matrix onto first processor. */ 653 Mat A; 654 Mat_SeqSBAIJ *Aloc; 655 Mat_SeqBAIJ *Bloc; 656 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 657 MatScalar *a; 658 659 /* Should this be the same type as mat? */ 660 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 661 if (!rank) { 662 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 663 } else { 664 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 665 } 666 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 667 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 668 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 669 670 /* copy over the A part */ 671 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 672 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 673 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 674 675 for (i=0; i<mbs; i++) { 676 rvals[0] = bs*(baij->rstartbs + i); 677 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 678 for (j=ai[i]; j<ai[i+1]; j++) { 679 col = (baij->cstartbs+aj[j])*bs; 680 for (k=0; k<bs; k++) { 681 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 682 col++; a += bs; 683 } 684 } 685 } 686 /* copy over the B part */ 687 Bloc = (Mat_SeqBAIJ*)baij->B->data; 688 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 689 for (i=0; i<mbs; i++) { 690 691 rvals[0] = bs*(baij->rstartbs + i); 692 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 693 for (j=ai[i]; j<ai[i+1]; j++) { 694 col = baij->garray[aj[j]]*bs; 695 for (k=0; k<bs; k++) { 696 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 697 col++; a += bs; 698 } 699 } 700 } 701 ierr = PetscFree(rvals);CHKERRQ(ierr); 702 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 703 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 704 /* 705 Everyone has to call to draw the matrix since the graphics waits are 706 synchronized across all processors that share the PetscDraw object 707 */ 708 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 709 if (!rank) { 710 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 711 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 712 } 713 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 714 ierr = MatDestroy(A);CHKERRQ(ierr); 715 } 716 PetscFunctionReturn(0); 717 } 718 719 #undef __FUNCT__ 720 #define __FUNCT__ "MatView_MPISBAIJ" 721 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 722 { 723 PetscErrorCode ierr; 724 PetscTruth iascii,isdraw,issocket,isbinary; 725 726 PetscFunctionBegin; 727 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 728 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 730 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 731 if (iascii || isdraw || issocket || isbinary) { 732 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 733 } else { 734 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 735 } 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatDestroy_MPISBAIJ" 741 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 742 { 743 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 744 PetscErrorCode ierr; 745 746 PetscFunctionBegin; 747 #if defined(PETSC_USE_LOG) 748 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 749 #endif 750 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 751 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 752 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 753 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 754 #if defined (PETSC_USE_CTABLE) 755 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 756 #else 757 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 758 #endif 759 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 760 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 761 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 762 if (baij->slvec0) { 763 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 764 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 765 } 766 if (baij->slvec1) { 767 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 768 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 769 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 770 } 771 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 772 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 773 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 774 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 775 if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);} 776 if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);} 777 if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);} 778 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 779 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 780 #endif 781 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 782 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 783 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 784 ierr = PetscFree(baij);CHKERRQ(ierr); 785 786 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 787 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 791 PetscFunctionReturn(0); 792 } 793 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 796 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 797 { 798 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 799 PetscErrorCode ierr; 800 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 801 PetscScalar *x,*from; 802 803 PetscFunctionBegin; 804 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 805 if (nt != A->cmap->n) { 806 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 807 } 808 809 /* diagonal part */ 810 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 811 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 812 813 /* subdiagonal part */ 814 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 815 816 /* copy x into the vec slvec0 */ 817 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 818 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 819 820 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 821 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 822 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 823 824 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 /* supperdiagonal part */ 827 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 831 #undef __FUNCT__ 832 #define __FUNCT__ "MatMult_MPISBAIJ" 833 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 834 { 835 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 836 PetscErrorCode ierr; 837 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 838 PetscScalar *x,*from; 839 840 PetscFunctionBegin; 841 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 842 if (nt != A->cmap->n) { 843 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 844 } 845 846 /* diagonal part */ 847 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 848 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 849 850 /* subdiagonal part */ 851 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 852 853 /* copy x into the vec slvec0 */ 854 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 855 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 856 857 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 858 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 859 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 860 861 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 /* supperdiagonal part */ 864 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 870 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 871 { 872 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 873 PetscErrorCode ierr; 874 PetscInt nt; 875 876 PetscFunctionBegin; 877 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 878 if (nt != A->cmap->n) { 879 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 880 } 881 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 882 if (nt != A->rmap->N) { 883 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 884 } 885 886 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 /* do diagonal part */ 888 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 889 /* do supperdiagonal part */ 890 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 891 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 892 /* do subdiagonal part */ 893 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 894 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 895 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 896 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 902 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 903 { 904 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscInt mbs=a->mbs,bs=A->rmap->bs; 907 PetscScalar *x,*from,zero=0.0; 908 909 PetscFunctionBegin; 910 /* 911 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 912 PetscSynchronizedFlush(((PetscObject)A)->comm); 913 */ 914 /* diagonal part */ 915 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 916 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 917 918 /* subdiagonal part */ 919 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 920 921 /* copy x into the vec slvec0 */ 922 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 923 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 924 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 925 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 926 927 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 929 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 930 931 /* supperdiagonal part */ 932 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 933 934 PetscFunctionReturn(0); 935 } 936 937 #undef __FUNCT__ 938 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 939 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 940 { 941 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 942 PetscErrorCode ierr; 943 944 PetscFunctionBegin; 945 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 /* do diagonal part */ 947 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 948 /* do supperdiagonal part */ 949 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 951 952 /* do subdiagonal part */ 953 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 954 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 955 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 956 957 PetscFunctionReturn(0); 958 } 959 960 /* 961 This only works correctly for square matrices where the subblock A->A is the 962 diagonal block 963 */ 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 966 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 967 { 968 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 969 PetscErrorCode ierr; 970 971 PetscFunctionBegin; 972 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 973 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatScale_MPISBAIJ" 979 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 980 { 981 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 982 PetscErrorCode ierr; 983 984 PetscFunctionBegin; 985 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 986 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatGetRow_MPISBAIJ" 992 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 993 { 994 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 995 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 996 PetscErrorCode ierr; 997 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 998 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 999 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1000 1001 PetscFunctionBegin; 1002 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1003 mat->getrowactive = PETSC_TRUE; 1004 1005 if (!mat->rowvalues && (idx || v)) { 1006 /* 1007 allocate enough space to hold information from the longest row. 1008 */ 1009 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1010 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1011 PetscInt max = 1,mbs = mat->mbs,tmp; 1012 for (i=0; i<mbs; i++) { 1013 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1014 if (max < tmp) { max = tmp; } 1015 } 1016 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1017 } 1018 1019 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1020 lrow = row - brstart; /* local row index */ 1021 1022 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1023 if (!v) {pvA = 0; pvB = 0;} 1024 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1025 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1026 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1027 nztot = nzA + nzB; 1028 1029 cmap = mat->garray; 1030 if (v || idx) { 1031 if (nztot) { 1032 /* Sort by increasing column numbers, assuming A and B already sorted */ 1033 PetscInt imark = -1; 1034 if (v) { 1035 *v = v_p = mat->rowvalues; 1036 for (i=0; i<nzB; i++) { 1037 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1038 else break; 1039 } 1040 imark = i; 1041 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1042 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1043 } 1044 if (idx) { 1045 *idx = idx_p = mat->rowindices; 1046 if (imark > -1) { 1047 for (i=0; i<imark; i++) { 1048 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1049 } 1050 } else { 1051 for (i=0; i<nzB; i++) { 1052 if (cmap[cworkB[i]/bs] < cstart) 1053 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1054 else break; 1055 } 1056 imark = i; 1057 } 1058 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1059 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1060 } 1061 } else { 1062 if (idx) *idx = 0; 1063 if (v) *v = 0; 1064 } 1065 } 1066 *nz = nztot; 1067 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1068 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1074 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1075 { 1076 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1077 1078 PetscFunctionBegin; 1079 if (!baij->getrowactive) { 1080 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1081 } 1082 baij->getrowactive = PETSC_FALSE; 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1088 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1089 { 1090 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1091 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1092 1093 PetscFunctionBegin; 1094 aA->getrow_utriangular = PETSC_TRUE; 1095 PetscFunctionReturn(0); 1096 } 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1099 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1100 { 1101 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1102 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1103 1104 PetscFunctionBegin; 1105 aA->getrow_utriangular = PETSC_FALSE; 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1111 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1112 { 1113 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1114 PetscErrorCode ierr; 1115 1116 PetscFunctionBegin; 1117 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1118 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1124 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1125 { 1126 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1131 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1137 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1138 { 1139 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1144 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1150 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1151 { 1152 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1153 Mat A = a->A,B = a->B; 1154 PetscErrorCode ierr; 1155 PetscReal isend[5],irecv[5]; 1156 1157 PetscFunctionBegin; 1158 info->block_size = (PetscReal)matin->rmap->bs; 1159 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else if (flag == MAT_GLOBAL_SUM) { 1179 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1180 info->nz_used = irecv[0]; 1181 info->nz_allocated = irecv[1]; 1182 info->nz_unneeded = irecv[2]; 1183 info->memory = irecv[3]; 1184 info->mallocs = irecv[4]; 1185 } else { 1186 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1187 } 1188 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1189 info->fill_ratio_needed = 0; 1190 info->factor_mallocs = 0; 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1196 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg) 1197 { 1198 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1199 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 switch (op) { 1204 case MAT_NEW_NONZERO_LOCATIONS: 1205 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1206 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1207 case MAT_KEEP_NONZERO_PATTERN: 1208 case MAT_NEW_NONZERO_LOCATION_ERR: 1209 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1210 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1211 break; 1212 case MAT_ROW_ORIENTED: 1213 a->roworiented = flg; 1214 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1215 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1216 break; 1217 case MAT_NEW_DIAGONALS: 1218 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1219 break; 1220 case MAT_IGNORE_OFF_PROC_ENTRIES: 1221 a->donotstash = flg; 1222 break; 1223 case MAT_USE_HASH_TABLE: 1224 a->ht_flag = flg; 1225 break; 1226 case MAT_HERMITIAN: 1227 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1228 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1229 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1230 break; 1231 case MAT_SYMMETRIC: 1232 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1233 break; 1234 case MAT_STRUCTURALLY_SYMMETRIC: 1235 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1236 break; 1237 case MAT_SYMMETRY_ETERNAL: 1238 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1239 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1240 break; 1241 case MAT_IGNORE_LOWER_TRIANGULAR: 1242 aA->ignore_ltriangular = flg; 1243 break; 1244 case MAT_ERROR_LOWER_TRIANGULAR: 1245 aA->ignore_ltriangular = flg; 1246 break; 1247 case MAT_GETROW_UPPERTRIANGULAR: 1248 aA->getrow_utriangular = flg; 1249 break; 1250 default: 1251 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1252 } 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1258 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1259 { 1260 PetscErrorCode ierr; 1261 PetscFunctionBegin; 1262 if (MAT_INITIAL_MATRIX || *B != A) { 1263 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1264 } 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1270 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1271 { 1272 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1273 Mat a=baij->A, b=baij->B; 1274 PetscErrorCode ierr; 1275 PetscInt nv,m,n; 1276 PetscTruth flg; 1277 1278 PetscFunctionBegin; 1279 if (ll != rr){ 1280 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1281 if (!flg) 1282 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1283 } 1284 if (!ll) PetscFunctionReturn(0); 1285 1286 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1287 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1288 1289 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1290 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1291 1292 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1293 1294 /* left diagonalscale the off-diagonal part */ 1295 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1296 1297 /* scale the diagonal part */ 1298 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1299 1300 /* right diagonalscale the off-diagonal part */ 1301 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1302 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1308 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1309 { 1310 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1311 PetscErrorCode ierr; 1312 1313 PetscFunctionBegin; 1314 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatEqual_MPISBAIJ" 1322 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1323 { 1324 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1325 Mat a,b,c,d; 1326 PetscTruth flg; 1327 PetscErrorCode ierr; 1328 1329 PetscFunctionBegin; 1330 a = matA->A; b = matA->B; 1331 c = matB->A; d = matB->B; 1332 1333 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1334 if (flg) { 1335 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1336 } 1337 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatCopy_MPISBAIJ" 1343 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1344 { 1345 PetscErrorCode ierr; 1346 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1347 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1348 1349 PetscFunctionBegin; 1350 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1351 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1352 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1353 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1354 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1355 } else { 1356 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1357 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1358 } 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1364 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1365 { 1366 PetscErrorCode ierr; 1367 1368 PetscFunctionBegin; 1369 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #include "petscblaslapack.h" 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1376 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1377 { 1378 PetscErrorCode ierr; 1379 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1380 PetscBLASInt bnz,one=1; 1381 Mat_SeqSBAIJ *xa,*ya; 1382 Mat_SeqBAIJ *xb,*yb; 1383 1384 PetscFunctionBegin; 1385 if (str == SAME_NONZERO_PATTERN) { 1386 PetscScalar alpha = a; 1387 xa = (Mat_SeqSBAIJ *)xx->A->data; 1388 ya = (Mat_SeqSBAIJ *)yy->A->data; 1389 bnz = PetscBLASIntCast(xa->nz); 1390 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1391 xb = (Mat_SeqBAIJ *)xx->B->data; 1392 yb = (Mat_SeqBAIJ *)yy->B->data; 1393 bnz = PetscBLASIntCast(xb->nz); 1394 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1395 } else { 1396 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1397 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1398 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1405 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1406 { 1407 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1408 PetscInt rbs,cbs; 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1413 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1414 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1415 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1416 if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1417 if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1423 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1424 { 1425 PetscErrorCode ierr; 1426 PetscInt i; 1427 PetscTruth flg; 1428 1429 PetscFunctionBegin; 1430 for (i=0; i<n; i++) { 1431 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1432 if (!flg) { 1433 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1434 } 1435 } 1436 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 1441 /* -------------------------------------------------------------------*/ 1442 static struct _MatOps MatOps_Values = { 1443 MatSetValues_MPISBAIJ, 1444 MatGetRow_MPISBAIJ, 1445 MatRestoreRow_MPISBAIJ, 1446 MatMult_MPISBAIJ, 1447 /* 4*/ MatMultAdd_MPISBAIJ, 1448 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1449 MatMultAdd_MPISBAIJ, 1450 0, 1451 0, 1452 0, 1453 /*10*/ 0, 1454 0, 1455 0, 1456 MatSOR_MPISBAIJ, 1457 MatTranspose_MPISBAIJ, 1458 /*15*/ MatGetInfo_MPISBAIJ, 1459 MatEqual_MPISBAIJ, 1460 MatGetDiagonal_MPISBAIJ, 1461 MatDiagonalScale_MPISBAIJ, 1462 MatNorm_MPISBAIJ, 1463 /*20*/ MatAssemblyBegin_MPISBAIJ, 1464 MatAssemblyEnd_MPISBAIJ, 1465 MatSetOption_MPISBAIJ, 1466 MatZeroEntries_MPISBAIJ, 1467 /*24*/ 0, 1468 0, 1469 0, 1470 0, 1471 0, 1472 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1473 0, 1474 0, 1475 0, 1476 0, 1477 /*34*/ MatDuplicate_MPISBAIJ, 1478 0, 1479 0, 1480 0, 1481 0, 1482 /*39*/ MatAXPY_MPISBAIJ, 1483 MatGetSubMatrices_MPISBAIJ, 1484 MatIncreaseOverlap_MPISBAIJ, 1485 MatGetValues_MPISBAIJ, 1486 MatCopy_MPISBAIJ, 1487 /*44*/ 0, 1488 MatScale_MPISBAIJ, 1489 0, 1490 0, 1491 0, 1492 /*49*/ MatSetBlockSize_MPISBAIJ, 1493 0, 1494 0, 1495 0, 1496 0, 1497 /*54*/ 0, 1498 0, 1499 MatSetUnfactored_MPISBAIJ, 1500 0, 1501 MatSetValuesBlocked_MPISBAIJ, 1502 /*59*/ 0, 1503 0, 1504 0, 1505 0, 1506 0, 1507 /*64*/ 0, 1508 0, 1509 0, 1510 0, 1511 0, 1512 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1513 0, 1514 0, 1515 0, 1516 0, 1517 /*74*/ 0, 1518 0, 1519 0, 1520 0, 1521 0, 1522 /*79*/ 0, 1523 0, 1524 0, 1525 0, 1526 MatLoad_MPISBAIJ, 1527 /*84*/ 0, 1528 0, 1529 0, 1530 0, 1531 0, 1532 /*89*/ 0, 1533 0, 1534 0, 1535 0, 1536 0, 1537 /*94*/ 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 /*99*/ 0, 1543 0, 1544 0, 1545 0, 1546 0, 1547 /*104*/0, 1548 MatRealPart_MPISBAIJ, 1549 MatImaginaryPart_MPISBAIJ, 1550 MatGetRowUpperTriangular_MPISBAIJ, 1551 MatRestoreRowUpperTriangular_MPISBAIJ 1552 }; 1553 1554 1555 EXTERN_C_BEGIN 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1558 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1559 { 1560 PetscFunctionBegin; 1561 *a = ((Mat_MPISBAIJ *)A->data)->A; 1562 *iscopy = PETSC_FALSE; 1563 PetscFunctionReturn(0); 1564 } 1565 EXTERN_C_END 1566 1567 EXTERN_C_BEGIN 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1570 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1571 { 1572 Mat_MPISBAIJ *b; 1573 PetscErrorCode ierr; 1574 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1575 1576 PetscFunctionBegin; 1577 if (bs < 0){ 1578 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1579 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1580 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1581 bs = PetscAbs(bs); 1582 } 1583 if ((d_nnz || o_nnz) && newbs != bs) { 1584 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1585 } 1586 bs = newbs; 1587 1588 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1589 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1590 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1591 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1592 1593 B->rmap->bs = B->cmap->bs = bs; 1594 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1595 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1596 1597 if (d_nnz) { 1598 for (i=0; i<B->rmap->n/bs; i++) { 1599 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1600 } 1601 } 1602 if (o_nnz) { 1603 for (i=0; i<B->rmap->n/bs; i++) { 1604 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1605 } 1606 } 1607 1608 b = (Mat_MPISBAIJ*)B->data; 1609 mbs = B->rmap->n/bs; 1610 Mbs = B->rmap->N/bs; 1611 if (mbs*bs != B->rmap->n) { 1612 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1613 } 1614 1615 B->rmap->bs = bs; 1616 b->bs2 = bs*bs; 1617 b->mbs = mbs; 1618 b->nbs = mbs; 1619 b->Mbs = Mbs; 1620 b->Nbs = Mbs; 1621 1622 for (i=0; i<=b->size; i++) { 1623 b->rangebs[i] = B->rmap->range[i]/bs; 1624 } 1625 b->rstartbs = B->rmap->rstart/bs; 1626 b->rendbs = B->rmap->rend/bs; 1627 1628 b->cstartbs = B->cmap->rstart/bs; 1629 b->cendbs = B->cmap->rend/bs; 1630 1631 if (!B->preallocated) { 1632 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1633 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1634 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1635 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1636 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1637 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1638 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1639 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1640 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->bstash);CHKERRQ(ierr); 1641 } 1642 1643 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1644 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1645 B->preallocated = PETSC_TRUE; 1646 PetscFunctionReturn(0); 1647 } 1648 EXTERN_C_END 1649 1650 EXTERN_C_BEGIN 1651 #if defined(PETSC_HAVE_MUMPS) 1652 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*); 1653 #endif 1654 #if defined(PETSC_HAVE_SPOOLES) 1655 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1656 #endif 1657 #if defined(PETSC_HAVE_PASTIX) 1658 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1659 #endif 1660 EXTERN_C_END 1661 1662 /*MC 1663 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1664 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1665 1666 Options Database Keys: 1667 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1668 1669 Level: beginner 1670 1671 .seealso: MatCreateMPISBAIJ 1672 M*/ 1673 1674 EXTERN_C_BEGIN 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatCreate_MPISBAIJ" 1677 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1678 { 1679 Mat_MPISBAIJ *b; 1680 PetscErrorCode ierr; 1681 PetscTruth flg; 1682 1683 PetscFunctionBegin; 1684 1685 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1686 B->data = (void*)b; 1687 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1688 1689 B->ops->destroy = MatDestroy_MPISBAIJ; 1690 B->ops->view = MatView_MPISBAIJ; 1691 B->mapping = 0; 1692 B->assembled = PETSC_FALSE; 1693 1694 B->insertmode = NOT_SET_VALUES; 1695 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1696 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1697 1698 /* build local table of row and column ownerships */ 1699 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1700 1701 /* build cache for off array entries formed */ 1702 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1703 b->donotstash = PETSC_FALSE; 1704 b->colmap = PETSC_NULL; 1705 b->garray = PETSC_NULL; 1706 b->roworiented = PETSC_TRUE; 1707 1708 /* stuff used in block assembly */ 1709 b->barray = 0; 1710 1711 /* stuff used for matrix vector multiply */ 1712 b->lvec = 0; 1713 b->Mvctx = 0; 1714 b->slvec0 = 0; 1715 b->slvec0b = 0; 1716 b->slvec1 = 0; 1717 b->slvec1a = 0; 1718 b->slvec1b = 0; 1719 b->sMvctx = 0; 1720 1721 /* stuff for MatGetRow() */ 1722 b->rowindices = 0; 1723 b->rowvalues = 0; 1724 b->getrowactive = PETSC_FALSE; 1725 1726 /* hash table stuff */ 1727 b->ht = 0; 1728 b->hd = 0; 1729 b->ht_size = 0; 1730 b->ht_flag = PETSC_FALSE; 1731 b->ht_fact = 0; 1732 b->ht_total_ct = 0; 1733 b->ht_insert_ct = 0; 1734 1735 b->in_loc = 0; 1736 b->v_loc = 0; 1737 b->n_loc = 0; 1738 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1739 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1740 if (flg) { 1741 PetscReal fact = 1.39; 1742 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1743 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1744 if (fact <= 1.0) fact = 1.39; 1745 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1746 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1747 } 1748 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1749 1750 #if defined(PETSC_HAVE_PASTIX) 1751 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1752 "MatGetFactor_mpisbaij_pastix", 1753 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1754 #endif 1755 #if defined(PETSC_HAVE_MUMPS) 1756 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1757 "MatGetFactor_mpisbaij_mumps", 1758 MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr); 1759 #endif 1760 #if defined(PETSC_HAVE_SPOOLES) 1761 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1762 "MatGetFactor_mpisbaij_spooles", 1763 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1764 #endif 1765 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1766 "MatStoreValues_MPISBAIJ", 1767 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1768 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1769 "MatRetrieveValues_MPISBAIJ", 1770 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1771 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1772 "MatGetDiagonalBlock_MPISBAIJ", 1773 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1774 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1775 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1776 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1777 B->symmetric = PETSC_TRUE; 1778 B->structurally_symmetric = PETSC_TRUE; 1779 B->symmetric_set = PETSC_TRUE; 1780 B->structurally_symmetric_set = PETSC_TRUE; 1781 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1782 PetscFunctionReturn(0); 1783 } 1784 EXTERN_C_END 1785 1786 /*MC 1787 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1788 1789 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1790 and MATMPISBAIJ otherwise. 1791 1792 Options Database Keys: 1793 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1794 1795 Level: beginner 1796 1797 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1798 M*/ 1799 1800 EXTERN_C_BEGIN 1801 #undef __FUNCT__ 1802 #define __FUNCT__ "MatCreate_SBAIJ" 1803 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1804 { 1805 PetscErrorCode ierr; 1806 PetscMPIInt size; 1807 1808 PetscFunctionBegin; 1809 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1810 if (size == 1) { 1811 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1812 } else { 1813 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1814 } 1815 PetscFunctionReturn(0); 1816 } 1817 EXTERN_C_END 1818 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1821 /*@C 1822 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1823 the user should preallocate the matrix storage by setting the parameters 1824 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1825 performance can be increased by more than a factor of 50. 1826 1827 Collective on Mat 1828 1829 Input Parameters: 1830 + A - the matrix 1831 . bs - size of blockk 1832 . d_nz - number of block nonzeros per block row in diagonal portion of local 1833 submatrix (same for all local rows) 1834 . d_nnz - array containing the number of block nonzeros in the various block rows 1835 in the upper triangular and diagonal part of the in diagonal portion of the local 1836 (possibly different for each block row) or PETSC_NULL. You must leave room 1837 for the diagonal entry even if it is zero. 1838 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1839 submatrix (same for all local rows). 1840 - o_nnz - array containing the number of nonzeros in the various block rows of the 1841 off-diagonal portion of the local submatrix (possibly different for 1842 each block row) or PETSC_NULL. 1843 1844 1845 Options Database Keys: 1846 . -mat_no_unroll - uses code that does not unroll the loops in the 1847 block calculations (much slower) 1848 . -mat_block_size - size of the blocks to use 1849 1850 Notes: 1851 1852 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1853 than it must be used on all processors that share the object for that argument. 1854 1855 If the *_nnz parameter is given then the *_nz parameter is ignored 1856 1857 Storage Information: 1858 For a square global matrix we define each processor's diagonal portion 1859 to be its local rows and the corresponding columns (a square submatrix); 1860 each processor's off-diagonal portion encompasses the remainder of the 1861 local matrix (a rectangular submatrix). 1862 1863 The user can specify preallocated storage for the diagonal part of 1864 the local submatrix with either d_nz or d_nnz (not both). Set 1865 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1866 memory allocation. Likewise, specify preallocated storage for the 1867 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1868 1869 You can call MatGetInfo() to get information on how effective the preallocation was; 1870 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1871 You can also run with the option -info and look for messages with the string 1872 malloc in them to see if additional memory allocation was needed. 1873 1874 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1875 the figure below we depict these three local rows and all columns (0-11). 1876 1877 .vb 1878 0 1 2 3 4 5 6 7 8 9 10 11 1879 ------------------- 1880 row 3 | o o o d d d o o o o o o 1881 row 4 | o o o d d d o o o o o o 1882 row 5 | o o o d d d o o o o o o 1883 ------------------- 1884 .ve 1885 1886 Thus, any entries in the d locations are stored in the d (diagonal) 1887 submatrix, and any entries in the o locations are stored in the 1888 o (off-diagonal) submatrix. Note that the d matrix is stored in 1889 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1890 1891 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1892 plus the diagonal part of the d matrix, 1893 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1894 In general, for PDE problems in which most nonzeros are near the diagonal, 1895 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1896 or you will get TERRIBLE performance; see the users' manual chapter on 1897 matrices. 1898 1899 Level: intermediate 1900 1901 .keywords: matrix, block, aij, compressed row, sparse, parallel 1902 1903 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1904 @*/ 1905 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1906 { 1907 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1908 1909 PetscFunctionBegin; 1910 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1911 if (f) { 1912 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatCreateMPISBAIJ" 1919 /*@C 1920 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1921 (block compressed row). For good matrix assembly performance 1922 the user should preallocate the matrix storage by setting the parameters 1923 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1924 performance can be increased by more than a factor of 50. 1925 1926 Collective on MPI_Comm 1927 1928 Input Parameters: 1929 + comm - MPI communicator 1930 . bs - size of blockk 1931 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1932 This value should be the same as the local size used in creating the 1933 y vector for the matrix-vector product y = Ax. 1934 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1935 This value should be the same as the local size used in creating the 1936 x vector for the matrix-vector product y = Ax. 1937 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1938 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1939 . d_nz - number of block nonzeros per block row in diagonal portion of local 1940 submatrix (same for all local rows) 1941 . d_nnz - array containing the number of block nonzeros in the various block rows 1942 in the upper triangular portion of the in diagonal portion of the local 1943 (possibly different for each block block row) or PETSC_NULL. 1944 You must leave room for the diagonal entry even if it is zero. 1945 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1946 submatrix (same for all local rows). 1947 - o_nnz - array containing the number of nonzeros in the various block rows of the 1948 off-diagonal portion of the local submatrix (possibly different for 1949 each block row) or PETSC_NULL. 1950 1951 Output Parameter: 1952 . A - the matrix 1953 1954 Options Database Keys: 1955 . -mat_no_unroll - uses code that does not unroll the loops in the 1956 block calculations (much slower) 1957 . -mat_block_size - size of the blocks to use 1958 . -mat_mpi - use the parallel matrix data structures even on one processor 1959 (defaults to using SeqBAIJ format on one processor) 1960 1961 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1962 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1963 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1964 1965 Notes: 1966 The number of rows and columns must be divisible by blocksize. 1967 This matrix type does not support complex Hermitian operation. 1968 1969 The user MUST specify either the local or global matrix dimensions 1970 (possibly both). 1971 1972 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1973 than it must be used on all processors that share the object for that argument. 1974 1975 If the *_nnz parameter is given then the *_nz parameter is ignored 1976 1977 Storage Information: 1978 For a square global matrix we define each processor's diagonal portion 1979 to be its local rows and the corresponding columns (a square submatrix); 1980 each processor's off-diagonal portion encompasses the remainder of the 1981 local matrix (a rectangular submatrix). 1982 1983 The user can specify preallocated storage for the diagonal part of 1984 the local submatrix with either d_nz or d_nnz (not both). Set 1985 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1986 memory allocation. Likewise, specify preallocated storage for the 1987 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1988 1989 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1990 the figure below we depict these three local rows and all columns (0-11). 1991 1992 .vb 1993 0 1 2 3 4 5 6 7 8 9 10 11 1994 ------------------- 1995 row 3 | o o o d d d o o o o o o 1996 row 4 | o o o d d d o o o o o o 1997 row 5 | o o o d d d o o o o o o 1998 ------------------- 1999 .ve 2000 2001 Thus, any entries in the d locations are stored in the d (diagonal) 2002 submatrix, and any entries in the o locations are stored in the 2003 o (off-diagonal) submatrix. Note that the d matrix is stored in 2004 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2005 2006 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2007 plus the diagonal part of the d matrix, 2008 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2009 In general, for PDE problems in which most nonzeros are near the diagonal, 2010 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2011 or you will get TERRIBLE performance; see the users' manual chapter on 2012 matrices. 2013 2014 Level: intermediate 2015 2016 .keywords: matrix, block, aij, compressed row, sparse, parallel 2017 2018 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2019 @*/ 2020 2021 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2022 { 2023 PetscErrorCode ierr; 2024 PetscMPIInt size; 2025 2026 PetscFunctionBegin; 2027 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2028 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2029 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2030 if (size > 1) { 2031 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2032 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2033 } else { 2034 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2035 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2036 } 2037 PetscFunctionReturn(0); 2038 } 2039 2040 2041 #undef __FUNCT__ 2042 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2043 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2044 { 2045 Mat mat; 2046 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2047 PetscErrorCode ierr; 2048 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2049 PetscScalar *array; 2050 2051 PetscFunctionBegin; 2052 *newmat = 0; 2053 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2054 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2055 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2056 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2057 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2058 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2059 2060 mat->factor = matin->factor; 2061 mat->preallocated = PETSC_TRUE; 2062 mat->assembled = PETSC_TRUE; 2063 mat->insertmode = NOT_SET_VALUES; 2064 2065 a = (Mat_MPISBAIJ*)mat->data; 2066 a->bs2 = oldmat->bs2; 2067 a->mbs = oldmat->mbs; 2068 a->nbs = oldmat->nbs; 2069 a->Mbs = oldmat->Mbs; 2070 a->Nbs = oldmat->Nbs; 2071 2072 2073 a->size = oldmat->size; 2074 a->rank = oldmat->rank; 2075 a->donotstash = oldmat->donotstash; 2076 a->roworiented = oldmat->roworiented; 2077 a->rowindices = 0; 2078 a->rowvalues = 0; 2079 a->getrowactive = PETSC_FALSE; 2080 a->barray = 0; 2081 a->rstartbs = oldmat->rstartbs; 2082 a->rendbs = oldmat->rendbs; 2083 a->cstartbs = oldmat->cstartbs; 2084 a->cendbs = oldmat->cendbs; 2085 2086 /* hash table stuff */ 2087 a->ht = 0; 2088 a->hd = 0; 2089 a->ht_size = 0; 2090 a->ht_flag = oldmat->ht_flag; 2091 a->ht_fact = oldmat->ht_fact; 2092 a->ht_total_ct = 0; 2093 a->ht_insert_ct = 0; 2094 2095 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2096 if (oldmat->colmap) { 2097 #if defined (PETSC_USE_CTABLE) 2098 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2099 #else 2100 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2101 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2102 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2103 #endif 2104 } else a->colmap = 0; 2105 2106 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2107 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2108 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2109 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2110 } else a->garray = 0; 2111 2112 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2113 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2114 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2115 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2116 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2117 2118 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2119 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2120 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2121 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2122 2123 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2124 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2125 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2126 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2127 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2128 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2129 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2130 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2131 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2132 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2133 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2134 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2135 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2136 2137 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2138 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2139 a->sMvctx = oldmat->sMvctx; 2140 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2141 2142 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2143 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2144 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2145 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2146 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2147 *newmat = mat; 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #include "petscsys.h" 2152 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatLoad_MPISBAIJ" 2155 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2156 { 2157 Mat A; 2158 PetscErrorCode ierr; 2159 PetscInt i,nz,j,rstart,rend; 2160 PetscScalar *vals,*buf; 2161 MPI_Comm comm = ((PetscObject)viewer)->comm; 2162 MPI_Status status; 2163 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2164 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2165 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2166 PetscInt bs=1,Mbs,mbs,extra_rows; 2167 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2168 PetscInt dcount,kmax,k,nzcount,tmp; 2169 int fd; 2170 2171 PetscFunctionBegin; 2172 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2173 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2174 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2175 2176 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2178 if (!rank) { 2179 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2180 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2181 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2182 if (header[3] < 0) { 2183 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2184 } 2185 } 2186 2187 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2188 M = header[1]; N = header[2]; 2189 2190 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2191 2192 /* 2193 This code adds extra rows to make sure the number of rows is 2194 divisible by the blocksize 2195 */ 2196 Mbs = M/bs; 2197 extra_rows = bs - M + bs*(Mbs); 2198 if (extra_rows == bs) extra_rows = 0; 2199 else Mbs++; 2200 if (extra_rows &&!rank) { 2201 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2202 } 2203 2204 /* determine ownership of all rows */ 2205 mbs = Mbs/size + ((Mbs % size) > rank); 2206 m = mbs*bs; 2207 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2208 mmbs = PetscMPIIntCast(mbs); 2209 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2210 rowners[0] = 0; 2211 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2212 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2213 rstart = rowners[rank]; 2214 rend = rowners[rank+1]; 2215 2216 /* distribute row lengths to all processors */ 2217 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2218 if (!rank) { 2219 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2220 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2221 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2222 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2223 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2224 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2225 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2226 } else { 2227 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2228 } 2229 2230 if (!rank) { /* procs[0] */ 2231 /* calculate the number of nonzeros on each processor */ 2232 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2233 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2234 for (i=0; i<size; i++) { 2235 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2236 procsnz[i] += rowlengths[j]; 2237 } 2238 } 2239 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2240 2241 /* determine max buffer needed and allocate it */ 2242 maxnz = 0; 2243 for (i=0; i<size; i++) { 2244 maxnz = PetscMax(maxnz,procsnz[i]); 2245 } 2246 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2247 2248 /* read in my part of the matrix column indices */ 2249 nz = procsnz[0]; 2250 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2251 mycols = ibuf; 2252 if (size == 1) nz -= extra_rows; 2253 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2254 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2255 2256 /* read in every ones (except the last) and ship off */ 2257 for (i=1; i<size-1; i++) { 2258 nz = procsnz[i]; 2259 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2260 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2261 } 2262 /* read in the stuff for the last proc */ 2263 if (size != 1) { 2264 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2265 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2266 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2267 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2268 } 2269 ierr = PetscFree(cols);CHKERRQ(ierr); 2270 } else { /* procs[i], i>0 */ 2271 /* determine buffer space needed for message */ 2272 nz = 0; 2273 for (i=0; i<m; i++) { 2274 nz += locrowlens[i]; 2275 } 2276 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2277 mycols = ibuf; 2278 /* receive message of column indices*/ 2279 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2280 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2281 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2282 } 2283 2284 /* loop over local rows, determining number of off diagonal entries */ 2285 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2286 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2287 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2288 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2289 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2290 rowcount = 0; 2291 nzcount = 0; 2292 for (i=0; i<mbs; i++) { 2293 dcount = 0; 2294 odcount = 0; 2295 for (j=0; j<bs; j++) { 2296 kmax = locrowlens[rowcount]; 2297 for (k=0; k<kmax; k++) { 2298 tmp = mycols[nzcount++]/bs; /* block col. index */ 2299 if (!mask[tmp]) { 2300 mask[tmp] = 1; 2301 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2302 else masked1[dcount++] = tmp; /* entry in diag portion */ 2303 } 2304 } 2305 rowcount++; 2306 } 2307 2308 dlens[i] = dcount; /* d_nzz[i] */ 2309 odlens[i] = odcount; /* o_nzz[i] */ 2310 2311 /* zero out the mask elements we set */ 2312 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2313 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2314 } 2315 2316 /* create our matrix */ 2317 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2318 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2319 ierr = MatSetType(A,type);CHKERRQ(ierr); 2320 ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2321 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2322 2323 if (!rank) { 2324 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2325 /* read in my part of the matrix numerical values */ 2326 nz = procsnz[0]; 2327 vals = buf; 2328 mycols = ibuf; 2329 if (size == 1) nz -= extra_rows; 2330 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2331 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2332 2333 /* insert into matrix */ 2334 jj = rstart*bs; 2335 for (i=0; i<m; i++) { 2336 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2337 mycols += locrowlens[i]; 2338 vals += locrowlens[i]; 2339 jj++; 2340 } 2341 2342 /* read in other processors (except the last one) and ship out */ 2343 for (i=1; i<size-1; i++) { 2344 nz = procsnz[i]; 2345 vals = buf; 2346 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2347 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2348 } 2349 /* the last proc */ 2350 if (size != 1){ 2351 nz = procsnz[i] - extra_rows; 2352 vals = buf; 2353 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2354 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2355 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2356 } 2357 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2358 2359 } else { 2360 /* receive numeric values */ 2361 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2362 2363 /* receive message of values*/ 2364 vals = buf; 2365 mycols = ibuf; 2366 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2367 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2368 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2369 2370 /* insert into matrix */ 2371 jj = rstart*bs; 2372 for (i=0; i<m; i++) { 2373 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2374 mycols += locrowlens[i]; 2375 vals += locrowlens[i]; 2376 jj++; 2377 } 2378 } 2379 2380 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2381 ierr = PetscFree(buf);CHKERRQ(ierr); 2382 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2383 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2384 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2385 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2386 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388 *newmat = A; 2389 PetscFunctionReturn(0); 2390 } 2391 2392 #undef __FUNCT__ 2393 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2394 /*XXXXX@ 2395 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2396 2397 Input Parameters: 2398 . mat - the matrix 2399 . fact - factor 2400 2401 Collective on Mat 2402 2403 Level: advanced 2404 2405 Notes: 2406 This can also be set by the command line option: -mat_use_hash_table fact 2407 2408 .keywords: matrix, hashtable, factor, HT 2409 2410 .seealso: MatSetOption() 2411 @XXXXX*/ 2412 2413 2414 #undef __FUNCT__ 2415 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2416 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2417 { 2418 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2419 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2420 PetscReal atmp; 2421 PetscReal *work,*svalues,*rvalues; 2422 PetscErrorCode ierr; 2423 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2424 PetscMPIInt rank,size; 2425 PetscInt *rowners_bs,dest,count,source; 2426 PetscScalar *va; 2427 MatScalar *ba; 2428 MPI_Status stat; 2429 2430 PetscFunctionBegin; 2431 if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2432 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2433 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2434 2435 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2436 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2437 2438 bs = A->rmap->bs; 2439 mbs = a->mbs; 2440 Mbs = a->Mbs; 2441 ba = b->a; 2442 bi = b->i; 2443 bj = b->j; 2444 2445 /* find ownerships */ 2446 rowners_bs = A->rmap->range; 2447 2448 /* each proc creates an array to be distributed */ 2449 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2450 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2451 2452 /* row_max for B */ 2453 if (rank != size-1){ 2454 for (i=0; i<mbs; i++) { 2455 ncols = bi[1] - bi[0]; bi++; 2456 brow = bs*i; 2457 for (j=0; j<ncols; j++){ 2458 bcol = bs*(*bj); 2459 for (kcol=0; kcol<bs; kcol++){ 2460 col = bcol + kcol; /* local col index */ 2461 col += rowners_bs[rank+1]; /* global col index */ 2462 for (krow=0; krow<bs; krow++){ 2463 atmp = PetscAbsScalar(*ba); ba++; 2464 row = brow + krow; /* local row index */ 2465 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2466 if (work[col] < atmp) work[col] = atmp; 2467 } 2468 } 2469 bj++; 2470 } 2471 } 2472 2473 /* send values to its owners */ 2474 for (dest=rank+1; dest<size; dest++){ 2475 svalues = work + rowners_bs[dest]; 2476 count = rowners_bs[dest+1]-rowners_bs[dest]; 2477 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2478 } 2479 } 2480 2481 /* receive values */ 2482 if (rank){ 2483 rvalues = work; 2484 count = rowners_bs[rank+1]-rowners_bs[rank]; 2485 for (source=0; source<rank; source++){ 2486 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2487 /* process values */ 2488 for (i=0; i<count; i++){ 2489 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2490 } 2491 } 2492 } 2493 2494 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2495 ierr = PetscFree(work);CHKERRQ(ierr); 2496 PetscFunctionReturn(0); 2497 } 2498 2499 #undef __FUNCT__ 2500 #define __FUNCT__ "MatSOR_MPISBAIJ" 2501 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2502 { 2503 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2504 PetscErrorCode ierr; 2505 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2506 PetscScalar *x,*b,*ptr,*from; 2507 Vec bb1; 2508 2509 PetscFunctionBegin; 2510 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2511 if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2512 2513 if (flag == SOR_APPLY_UPPER) { 2514 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517 2518 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2519 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2520 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2521 its--; 2522 } 2523 2524 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2525 while (its--){ 2526 2527 /* lower triangular part: slvec0b = - B^T*xx */ 2528 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2529 2530 /* copy xx into slvec0a */ 2531 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2532 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2533 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2534 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2535 2536 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2537 2538 /* copy bb into slvec1a */ 2539 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2540 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2541 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2542 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2543 2544 /* set slvec1b = 0 */ 2545 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2546 2547 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2548 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2549 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2550 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2551 2552 /* upper triangular part: bb1 = bb1 - B*x */ 2553 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2554 2555 /* local diagonal sweep */ 2556 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2557 } 2558 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2559 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2560 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2561 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2562 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2563 } else if (flag & SOR_EISENSTAT) { 2564 Vec xx1; 2565 PetscTruth hasop; 2566 const PetscScalar *diag; 2567 PetscScalar *sl,scale = (omega - 2.0)/omega; 2568 PetscInt i,n; 2569 2570 if (!mat->xx1) { 2571 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2572 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2573 } 2574 xx1 = mat->xx1; 2575 bb1 = mat->bb1; 2576 2577 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2578 2579 if (!mat->diag) { 2580 /* this is wrong for same matrix with new nonzero values */ 2581 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2582 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2583 } 2584 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2585 2586 if (hasop) { 2587 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2588 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2589 } else { 2590 /* 2591 These two lines are replaced by code that may be a bit faster for a good compiler 2592 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2593 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2594 */ 2595 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2596 ierr = VecGetArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr); 2597 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2598 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2599 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2600 if (omega == 1.0) { 2601 for (i=0; i<n; i++) { 2602 sl[i] = b[i] - diag[i]*x[i]; 2603 } 2604 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2605 } else { 2606 for (i=0; i<n; i++) { 2607 sl[i] = b[i] + scale*diag[i]*x[i]; 2608 } 2609 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2610 } 2611 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2612 ierr = VecRestoreArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr); 2613 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2614 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2615 } 2616 2617 /* multiply off-diagonal portion of matrix */ 2618 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2619 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2620 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2621 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2622 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2623 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2624 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2625 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2626 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2627 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2628 2629 /* local sweep */ 2630 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2631 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2632 } else { 2633 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2634 } 2635 PetscFunctionReturn(0); 2636 } 2637 2638 #undef __FUNCT__ 2639 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2640 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2641 { 2642 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2643 PetscErrorCode ierr; 2644 Vec lvec1,bb1; 2645 2646 PetscFunctionBegin; 2647 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2648 if (matin->rmap->bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2649 2650 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2651 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2652 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2653 its--; 2654 } 2655 2656 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2657 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2658 while (its--){ 2659 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2660 2661 /* lower diagonal part: bb1 = bb - B^T*xx */ 2662 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2663 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2664 2665 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2666 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2667 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2668 2669 /* upper diagonal part: bb1 = bb1 - B*x */ 2670 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2671 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2672 2673 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2674 2675 /* diagonal sweep */ 2676 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2677 } 2678 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2679 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2680 } else { 2681 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2682 } 2683 PetscFunctionReturn(0); 2684 } 2685 2686