1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "../src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 EXTERN_C_BEGIN 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 26 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 27 { 28 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 29 PetscErrorCode ierr; 30 31 PetscFunctionBegin; 32 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 33 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN_C_BEGIN 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 EXTERN_C_END 52 53 54 #define CHUNKSIZE 10 55 56 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 57 { \ 58 \ 59 brow = row/bs; \ 60 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 61 rmax = aimax[brow]; nrow = ailen[brow]; \ 62 bcol = col/bs; \ 63 ridx = row % bs; cidx = col % bs; \ 64 low = 0; high = nrow; \ 65 while (high-low > 3) { \ 66 t = (low+high)/2; \ 67 if (rp[t] > bcol) high = t; \ 68 else low = t; \ 69 } \ 70 for (_i=low; _i<high; _i++) { \ 71 if (rp[_i] > bcol) break; \ 72 if (rp[_i] == bcol) { \ 73 bap = ap + bs2*_i + bs*cidx + ridx; \ 74 if (addv == ADD_VALUES) *bap += value; \ 75 else *bap = value; \ 76 goto a_noinsert; \ 77 } \ 78 } \ 79 if (a->nonew == 1) goto a_noinsert; \ 80 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 81 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 82 N = nrow++ - 1; \ 83 /* shift up all the later entries in this row */ \ 84 for (ii=N; ii>=_i; ii--) { \ 85 rp[ii+1] = rp[ii]; \ 86 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 87 } \ 88 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 89 rp[_i] = bcol; \ 90 ap[bs2*_i + bs*cidx + ridx] = value; \ 91 a_noinsert:; \ 92 ailen[brow] = nrow; \ 93 } 94 95 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 96 { \ 97 brow = row/bs; \ 98 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 99 rmax = bimax[brow]; nrow = bilen[brow]; \ 100 bcol = col/bs; \ 101 ridx = row % bs; cidx = col % bs; \ 102 low = 0; high = nrow; \ 103 while (high-low > 3) { \ 104 t = (low+high)/2; \ 105 if (rp[t] > bcol) high = t; \ 106 else low = t; \ 107 } \ 108 for (_i=low; _i<high; _i++) { \ 109 if (rp[_i] > bcol) break; \ 110 if (rp[_i] == bcol) { \ 111 bap = ap + bs2*_i + bs*cidx + ridx; \ 112 if (addv == ADD_VALUES) *bap += value; \ 113 else *bap = value; \ 114 goto b_noinsert; \ 115 } \ 116 } \ 117 if (b->nonew == 1) goto b_noinsert; \ 118 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 119 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 120 N = nrow++ - 1; \ 121 /* shift up all the later entries in this row */ \ 122 for (ii=N; ii>=_i; ii--) { \ 123 rp[ii+1] = rp[ii]; \ 124 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 125 } \ 126 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 127 rp[_i] = bcol; \ 128 ap[bs2*_i + bs*cidx + ridx] = value; \ 129 b_noinsert:; \ 130 bilen[brow] = nrow; \ 131 } 132 133 /* Only add/insert a(i,j) with i<=j (blocks). 134 Any a(i,j) with i>j input by user is ingored. 135 */ 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatSetValues_MPISBAIJ" 138 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 139 { 140 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 141 MatScalar value; 142 PetscTruth roworiented = baij->roworiented; 143 PetscErrorCode ierr; 144 PetscInt i,j,row,col; 145 PetscInt rstart_orig=mat->rmap->rstart; 146 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 147 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 148 149 /* Some Variables required in the macro */ 150 Mat A = baij->A; 151 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 152 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 153 MatScalar *aa=a->a; 154 155 Mat B = baij->B; 156 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 157 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 158 MatScalar *ba=b->a; 159 160 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 161 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 162 MatScalar *ap,*bap; 163 164 /* for stash */ 165 PetscInt n_loc, *in_loc = PETSC_NULL; 166 MatScalar *v_loc = PETSC_NULL; 167 168 PetscFunctionBegin; 169 if (v) PetscValidScalarPointer(v,6); 170 if (!baij->donotstash){ 171 if (n > baij->n_loc) { 172 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 173 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 174 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 175 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 176 baij->n_loc = n; 177 } 178 in_loc = baij->in_loc; 179 v_loc = baij->v_loc; 180 } 181 182 for (i=0; i<m; i++) { 183 if (im[i] < 0) continue; 184 #if defined(PETSC_USE_DEBUG) 185 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 186 #endif 187 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 188 row = im[i] - rstart_orig; /* local row index */ 189 for (j=0; j<n; j++) { 190 if (im[i]/bs > in[j]/bs){ 191 if (a->ignore_ltriangular){ 192 continue; /* ignore lower triangular blocks */ 193 } else { 194 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 195 } 196 } 197 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 198 col = in[j] - cstart_orig; /* local col index */ 199 brow = row/bs; bcol = col/bs; 200 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 201 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 202 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 203 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 204 } else if (in[j] < 0) continue; 205 #if defined(PETSC_USE_DEBUG) 206 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 207 #endif 208 else { /* off-diag entry (B) */ 209 if (mat->was_assembled) { 210 if (!baij->colmap) { 211 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 212 } 213 #if defined (PETSC_USE_CTABLE) 214 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 215 col = col - 1; 216 #else 217 col = baij->colmap[in[j]/bs] - 1; 218 #endif 219 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 220 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 221 col = in[j]; 222 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 223 B = baij->B; 224 b = (Mat_SeqBAIJ*)(B)->data; 225 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 226 ba=b->a; 227 } else col += in[j]%bs; 228 } else col = in[j]; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } 233 } 234 } else { /* off processor entry */ 235 if (!baij->donotstash) { 236 n_loc = 0; 237 for (j=0; j<n; j++){ 238 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 239 in_loc[n_loc] = in[j]; 240 if (roworiented) { 241 v_loc[n_loc] = v[i*n+j]; 242 } else { 243 v_loc[n_loc] = v[j*m+i]; 244 } 245 n_loc++; 246 } 247 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 257 { 258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 259 const MatScalar *value; 260 MatScalar *barray=baij->barray; 261 PetscTruth roworiented = baij->roworiented; 262 PetscErrorCode ierr; 263 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 264 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 265 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 266 267 PetscFunctionBegin; 268 if(!barray) { 269 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 270 baij->barray = barray; 271 } 272 273 if (roworiented) { 274 stepval = (n-1)*bs; 275 } else { 276 stepval = (m-1)*bs; 277 } 278 for (i=0; i<m; i++) { 279 if (im[i] < 0) continue; 280 #if defined(PETSC_USE_DEBUG) 281 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 282 #endif 283 if (im[i] >= rstart && im[i] < rend) { 284 row = im[i] - rstart; 285 for (j=0; j<n; j++) { 286 /* If NumCol = 1 then a copy is not required */ 287 if ((roworiented) && (n == 1)) { 288 barray = (MatScalar*) v + i*bs2; 289 } else if((!roworiented) && (m == 1)) { 290 barray = (MatScalar*) v + j*bs2; 291 } else { /* Here a copy is required */ 292 if (roworiented) { 293 value = v + i*(stepval+bs)*bs + j*bs; 294 } else { 295 value = v + j*(stepval+bs)*bs + i*bs; 296 } 297 for (ii=0; ii<bs; ii++,value+=stepval) { 298 for (jj=0; jj<bs; jj++) { 299 *barray++ = *value++; 300 } 301 } 302 barray -=bs2; 303 } 304 305 if (in[j] >= cstart && in[j] < cend){ 306 col = in[j] - cstart; 307 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 308 } 309 else if (in[j] < 0) continue; 310 #if defined(PETSC_USE_DEBUG) 311 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 312 #endif 313 else { 314 if (mat->was_assembled) { 315 if (!baij->colmap) { 316 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 317 } 318 319 #if defined(PETSC_USE_DEBUG) 320 #if defined (PETSC_USE_CTABLE) 321 { PetscInt data; 322 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 323 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 324 } 325 #else 326 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 327 #endif 328 #endif 329 #if defined (PETSC_USE_CTABLE) 330 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 331 col = (col - 1)/bs; 332 #else 333 col = (baij->colmap[in[j]] - 1)/bs; 334 #endif 335 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 336 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 337 col = in[j]; 338 } 339 } 340 else col = in[j]; 341 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 342 } 343 } 344 } else { 345 if (!baij->donotstash) { 346 if (roworiented) { 347 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 348 } else { 349 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 350 } 351 } 352 } 353 } 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatGetValues_MPISBAIJ" 359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 360 { 361 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 364 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 365 366 PetscFunctionBegin; 367 for (i=0; i<m; i++) { 368 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 369 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 370 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 371 row = idxm[i] - bsrstart; 372 for (j=0; j<n; j++) { 373 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 374 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 375 if (idxn[j] >= bscstart && idxn[j] < bscend){ 376 col = idxn[j] - bscstart; 377 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 378 } else { 379 if (!baij->colmap) { 380 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 381 } 382 #if defined (PETSC_USE_CTABLE) 383 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 384 data --; 385 #else 386 data = baij->colmap[idxn[j]/bs]-1; 387 #endif 388 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 389 else { 390 col = data + idxn[j]%bs; 391 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 392 } 393 } 394 } 395 } else { 396 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 397 } 398 } 399 PetscFunctionReturn(0); 400 } 401 402 #undef __FUNCT__ 403 #define __FUNCT__ "MatNorm_MPISBAIJ" 404 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 405 { 406 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 407 PetscErrorCode ierr; 408 PetscReal sum[2],*lnorm2; 409 410 PetscFunctionBegin; 411 if (baij->size == 1) { 412 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 413 } else { 414 if (type == NORM_FROBENIUS) { 415 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 416 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 417 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 418 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 419 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 420 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 421 *norm = sqrt(sum[0] + 2*sum[1]); 422 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 423 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 424 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 425 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 426 PetscReal *rsum,*rsum2,vabs; 427 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 428 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 429 MatScalar *v; 430 431 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 432 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 433 /* Amat */ 434 v = amat->a; jj = amat->j; 435 for (brow=0; brow<mbs; brow++) { 436 grow = bs*(rstart + brow); 437 nz = amat->i[brow+1] - amat->i[brow]; 438 for (bcol=0; bcol<nz; bcol++){ 439 gcol = bs*(rstart + *jj); jj++; 440 for (col=0; col<bs; col++){ 441 for (row=0; row<bs; row++){ 442 vabs = PetscAbsScalar(*v); v++; 443 rsum[gcol+col] += vabs; 444 /* non-diagonal block */ 445 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 446 } 447 } 448 } 449 } 450 /* Bmat */ 451 v = bmat->a; jj = bmat->j; 452 for (brow=0; brow<mbs; brow++) { 453 grow = bs*(rstart + brow); 454 nz = bmat->i[brow+1] - bmat->i[brow]; 455 for (bcol=0; bcol<nz; bcol++){ 456 gcol = bs*garray[*jj]; jj++; 457 for (col=0; col<bs; col++){ 458 for (row=0; row<bs; row++){ 459 vabs = PetscAbsScalar(*v); v++; 460 rsum[gcol+col] += vabs; 461 rsum[grow+row] += vabs; 462 } 463 } 464 } 465 } 466 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 *norm = 0.0; 468 for (col=0; col<mat->cmap->N; col++) { 469 if (rsum2[col] > *norm) *norm = rsum2[col]; 470 } 471 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 472 } else { 473 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 474 } 475 } 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 481 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 484 PetscErrorCode ierr; 485 PetscInt nstash,reallocs; 486 InsertMode addv; 487 488 PetscFunctionBegin; 489 if (baij->donotstash) { 490 PetscFunctionReturn(0); 491 } 492 493 /* make sure all processors are either in INSERTMODE or ADDMODE */ 494 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 495 if (addv == (ADD_VALUES|INSERT_VALUES)) { 496 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 497 } 498 mat->insertmode = addv; /* in case this processor had no cache */ 499 500 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 501 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 502 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 503 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 504 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 505 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 506 PetscFunctionReturn(0); 507 } 508 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 511 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 512 { 513 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 514 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 515 PetscErrorCode ierr; 516 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 517 PetscInt *row,*col; 518 PetscTruth other_disassembled; 519 PetscMPIInt n; 520 PetscTruth r1,r2,r3; 521 MatScalar *val; 522 InsertMode addv = mat->insertmode; 523 524 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 525 PetscFunctionBegin; 526 527 if (!baij->donotstash) { 528 while (1) { 529 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 530 if (!flg) break; 531 532 for (i=0; i<n;) { 533 /* Now identify the consecutive vals belonging to the same row */ 534 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 535 if (j < n) ncols = j-i; 536 else ncols = n-i; 537 /* Now assemble all these values with a single function call */ 538 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 539 i = j; 540 } 541 } 542 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 543 /* Now process the block-stash. Since the values are stashed column-oriented, 544 set the roworiented flag to column oriented, and after MatSetValues() 545 restore the original flags */ 546 r1 = baij->roworiented; 547 r2 = a->roworiented; 548 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 549 baij->roworiented = PETSC_FALSE; 550 a->roworiented = PETSC_FALSE; 551 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 552 while (1) { 553 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 554 if (!flg) break; 555 556 for (i=0; i<n;) { 557 /* Now identify the consecutive vals belonging to the same row */ 558 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 559 if (j < n) ncols = j-i; 560 else ncols = n-i; 561 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 562 i = j; 563 } 564 } 565 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 566 baij->roworiented = r1; 567 a->roworiented = r2; 568 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 569 } 570 571 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 572 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 573 574 /* determine if any processor has disassembled, if so we must 575 also disassemble ourselfs, in order that we may reassemble. */ 576 /* 577 if nonzero structure of submatrix B cannot change then we know that 578 no processor disassembled thus we can skip this stuff 579 */ 580 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 581 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 582 if (mat->was_assembled && !other_disassembled) { 583 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 584 } 585 } 586 587 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 588 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 589 } 590 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 591 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 592 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 593 594 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 595 baij->rowvalues = 0; 596 597 PetscFunctionReturn(0); 598 } 599 600 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 603 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 604 { 605 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt bs = mat->rmap->bs; 608 PetscMPIInt size = baij->size,rank = baij->rank; 609 PetscTruth iascii,isdraw; 610 PetscViewer sviewer; 611 PetscViewerFormat format; 612 613 PetscFunctionBegin; 614 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 615 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 616 if (iascii) { 617 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 618 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 619 MatInfo info; 620 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 621 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 622 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 623 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 624 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 625 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 626 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 627 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 628 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 629 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 630 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 631 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } else if (format == PETSC_VIEWER_ASCII_INFO) { 634 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 635 PetscFunctionReturn(0); 636 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 637 PetscFunctionReturn(0); 638 } 639 } 640 641 if (isdraw) { 642 PetscDraw draw; 643 PetscTruth isnull; 644 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 645 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 646 } 647 648 if (size == 1) { 649 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 650 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 651 } else { 652 /* assemble the entire matrix onto first processor. */ 653 Mat A; 654 Mat_SeqSBAIJ *Aloc; 655 Mat_SeqBAIJ *Bloc; 656 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 657 MatScalar *a; 658 659 /* Should this be the same type as mat? */ 660 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 661 if (!rank) { 662 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 663 } else { 664 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 665 } 666 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 667 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 668 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 669 670 /* copy over the A part */ 671 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 672 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 673 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 674 675 for (i=0; i<mbs; i++) { 676 rvals[0] = bs*(baij->rstartbs + i); 677 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 678 for (j=ai[i]; j<ai[i+1]; j++) { 679 col = (baij->cstartbs+aj[j])*bs; 680 for (k=0; k<bs; k++) { 681 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 682 col++; a += bs; 683 } 684 } 685 } 686 /* copy over the B part */ 687 Bloc = (Mat_SeqBAIJ*)baij->B->data; 688 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 689 for (i=0; i<mbs; i++) { 690 691 rvals[0] = bs*(baij->rstartbs + i); 692 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 693 for (j=ai[i]; j<ai[i+1]; j++) { 694 col = baij->garray[aj[j]]*bs; 695 for (k=0; k<bs; k++) { 696 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 697 col++; a += bs; 698 } 699 } 700 } 701 ierr = PetscFree(rvals);CHKERRQ(ierr); 702 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 703 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 704 /* 705 Everyone has to call to draw the matrix since the graphics waits are 706 synchronized across all processors that share the PetscDraw object 707 */ 708 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 709 if (!rank) { 710 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 711 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 712 } 713 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 714 ierr = MatDestroy(A);CHKERRQ(ierr); 715 } 716 PetscFunctionReturn(0); 717 } 718 719 #undef __FUNCT__ 720 #define __FUNCT__ "MatView_MPISBAIJ" 721 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 722 { 723 PetscErrorCode ierr; 724 PetscTruth iascii,isdraw,issocket,isbinary; 725 726 PetscFunctionBegin; 727 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 728 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 730 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 731 if (iascii || isdraw || issocket || isbinary) { 732 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 733 } else { 734 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 735 } 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatDestroy_MPISBAIJ" 741 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 742 { 743 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 744 PetscErrorCode ierr; 745 746 PetscFunctionBegin; 747 #if defined(PETSC_USE_LOG) 748 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 749 #endif 750 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 751 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 752 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 753 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 754 #if defined (PETSC_USE_CTABLE) 755 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 756 #else 757 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 758 #endif 759 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 760 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 761 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 762 if (baij->slvec0) { 763 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 764 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 765 } 766 if (baij->slvec1) { 767 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 768 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 769 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 770 } 771 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 772 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 773 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 774 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 775 if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);} 776 if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);} 777 if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);} 778 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 779 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 780 #endif 781 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 782 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 783 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 784 ierr = PetscFree(baij);CHKERRQ(ierr); 785 786 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 787 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 791 PetscFunctionReturn(0); 792 } 793 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 796 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 797 { 798 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 799 PetscErrorCode ierr; 800 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 801 PetscScalar *x,*from; 802 803 PetscFunctionBegin; 804 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 805 if (nt != A->cmap->n) { 806 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 807 } 808 809 /* diagonal part */ 810 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 811 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 812 813 /* subdiagonal part */ 814 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 815 816 /* copy x into the vec slvec0 */ 817 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 818 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 819 820 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 821 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 822 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 823 824 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 /* supperdiagonal part */ 827 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 831 #undef __FUNCT__ 832 #define __FUNCT__ "MatMult_MPISBAIJ" 833 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 834 { 835 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 836 PetscErrorCode ierr; 837 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 838 PetscScalar *x,*from; 839 840 PetscFunctionBegin; 841 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 842 if (nt != A->cmap->n) { 843 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 844 } 845 846 /* diagonal part */ 847 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 848 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 849 850 /* subdiagonal part */ 851 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 852 853 /* copy x into the vec slvec0 */ 854 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 855 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 856 857 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 858 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 859 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 860 861 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 /* supperdiagonal part */ 864 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 870 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 871 { 872 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 873 PetscErrorCode ierr; 874 PetscInt nt; 875 876 PetscFunctionBegin; 877 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 878 if (nt != A->cmap->n) { 879 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 880 } 881 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 882 if (nt != A->rmap->N) { 883 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 884 } 885 886 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 /* do diagonal part */ 888 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 889 /* do supperdiagonal part */ 890 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 891 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 892 /* do subdiagonal part */ 893 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 894 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 895 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 896 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 902 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 903 { 904 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscInt mbs=a->mbs,bs=A->rmap->bs; 907 PetscScalar *x,*from,zero=0.0; 908 909 PetscFunctionBegin; 910 /* 911 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 912 PetscSynchronizedFlush(((PetscObject)A)->comm); 913 */ 914 /* diagonal part */ 915 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 916 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 917 918 /* subdiagonal part */ 919 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 920 921 /* copy x into the vec slvec0 */ 922 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 923 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 924 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 925 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 926 927 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 929 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 930 931 /* supperdiagonal part */ 932 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 933 934 PetscFunctionReturn(0); 935 } 936 937 #undef __FUNCT__ 938 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 939 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 940 { 941 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 942 PetscErrorCode ierr; 943 944 PetscFunctionBegin; 945 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 /* do diagonal part */ 947 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 948 /* do supperdiagonal part */ 949 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 951 952 /* do subdiagonal part */ 953 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 954 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 955 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 956 957 PetscFunctionReturn(0); 958 } 959 960 /* 961 This only works correctly for square matrices where the subblock A->A is the 962 diagonal block 963 */ 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 966 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 967 { 968 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 969 PetscErrorCode ierr; 970 971 PetscFunctionBegin; 972 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 973 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatScale_MPISBAIJ" 979 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 980 { 981 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 982 PetscErrorCode ierr; 983 984 PetscFunctionBegin; 985 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 986 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatGetRow_MPISBAIJ" 992 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 993 { 994 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 995 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 996 PetscErrorCode ierr; 997 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 998 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 999 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1000 1001 PetscFunctionBegin; 1002 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1003 mat->getrowactive = PETSC_TRUE; 1004 1005 if (!mat->rowvalues && (idx || v)) { 1006 /* 1007 allocate enough space to hold information from the longest row. 1008 */ 1009 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1010 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1011 PetscInt max = 1,mbs = mat->mbs,tmp; 1012 for (i=0; i<mbs; i++) { 1013 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1014 if (max < tmp) { max = tmp; } 1015 } 1016 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1017 } 1018 1019 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1020 lrow = row - brstart; /* local row index */ 1021 1022 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1023 if (!v) {pvA = 0; pvB = 0;} 1024 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1025 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1026 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1027 nztot = nzA + nzB; 1028 1029 cmap = mat->garray; 1030 if (v || idx) { 1031 if (nztot) { 1032 /* Sort by increasing column numbers, assuming A and B already sorted */ 1033 PetscInt imark = -1; 1034 if (v) { 1035 *v = v_p = mat->rowvalues; 1036 for (i=0; i<nzB; i++) { 1037 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1038 else break; 1039 } 1040 imark = i; 1041 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1042 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1043 } 1044 if (idx) { 1045 *idx = idx_p = mat->rowindices; 1046 if (imark > -1) { 1047 for (i=0; i<imark; i++) { 1048 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1049 } 1050 } else { 1051 for (i=0; i<nzB; i++) { 1052 if (cmap[cworkB[i]/bs] < cstart) 1053 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1054 else break; 1055 } 1056 imark = i; 1057 } 1058 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1059 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1060 } 1061 } else { 1062 if (idx) *idx = 0; 1063 if (v) *v = 0; 1064 } 1065 } 1066 *nz = nztot; 1067 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1068 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1074 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1075 { 1076 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1077 1078 PetscFunctionBegin; 1079 if (!baij->getrowactive) { 1080 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1081 } 1082 baij->getrowactive = PETSC_FALSE; 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1088 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1089 { 1090 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1091 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1092 1093 PetscFunctionBegin; 1094 aA->getrow_utriangular = PETSC_TRUE; 1095 PetscFunctionReturn(0); 1096 } 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1099 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1100 { 1101 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1102 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1103 1104 PetscFunctionBegin; 1105 aA->getrow_utriangular = PETSC_FALSE; 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1111 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1112 { 1113 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1114 PetscErrorCode ierr; 1115 1116 PetscFunctionBegin; 1117 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1118 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1124 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1125 { 1126 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1131 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1137 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1138 { 1139 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1144 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1150 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1151 { 1152 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1153 Mat A = a->A,B = a->B; 1154 PetscErrorCode ierr; 1155 PetscReal isend[5],irecv[5]; 1156 1157 PetscFunctionBegin; 1158 info->block_size = (PetscReal)matin->rmap->bs; 1159 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else if (flag == MAT_GLOBAL_SUM) { 1179 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1180 info->nz_used = irecv[0]; 1181 info->nz_allocated = irecv[1]; 1182 info->nz_unneeded = irecv[2]; 1183 info->memory = irecv[3]; 1184 info->mallocs = irecv[4]; 1185 } else { 1186 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1187 } 1188 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1189 info->fill_ratio_needed = 0; 1190 info->factor_mallocs = 0; 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1196 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg) 1197 { 1198 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1199 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 switch (op) { 1204 case MAT_NEW_NONZERO_LOCATIONS: 1205 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1206 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1207 case MAT_KEEP_NONZERO_PATTERN: 1208 case MAT_NEW_NONZERO_LOCATION_ERR: 1209 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1210 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1211 break; 1212 case MAT_ROW_ORIENTED: 1213 a->roworiented = flg; 1214 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1215 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1216 break; 1217 case MAT_NEW_DIAGONALS: 1218 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1219 break; 1220 case MAT_IGNORE_OFF_PROC_ENTRIES: 1221 a->donotstash = flg; 1222 break; 1223 case MAT_USE_HASH_TABLE: 1224 a->ht_flag = flg; 1225 break; 1226 case MAT_HERMITIAN: 1227 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1228 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1229 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1230 break; 1231 case MAT_SYMMETRIC: 1232 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1233 break; 1234 case MAT_STRUCTURALLY_SYMMETRIC: 1235 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1236 break; 1237 case MAT_SYMMETRY_ETERNAL: 1238 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1239 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1240 break; 1241 case MAT_IGNORE_LOWER_TRIANGULAR: 1242 aA->ignore_ltriangular = flg; 1243 break; 1244 case MAT_ERROR_LOWER_TRIANGULAR: 1245 aA->ignore_ltriangular = flg; 1246 break; 1247 case MAT_GETROW_UPPERTRIANGULAR: 1248 aA->getrow_utriangular = flg; 1249 break; 1250 default: 1251 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1252 } 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1258 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1259 { 1260 PetscErrorCode ierr; 1261 PetscFunctionBegin; 1262 if (MAT_INITIAL_MATRIX || *B != A) { 1263 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1264 } 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1270 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1271 { 1272 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1273 Mat a=baij->A, b=baij->B; 1274 PetscErrorCode ierr; 1275 PetscInt nv,m,n; 1276 PetscTruth flg; 1277 1278 PetscFunctionBegin; 1279 if (ll != rr){ 1280 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1281 if (!flg) 1282 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1283 } 1284 if (!ll) PetscFunctionReturn(0); 1285 1286 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1287 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1288 1289 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1290 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1291 1292 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1293 1294 /* left diagonalscale the off-diagonal part */ 1295 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1296 1297 /* scale the diagonal part */ 1298 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1299 1300 /* right diagonalscale the off-diagonal part */ 1301 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1302 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1308 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1309 { 1310 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1311 PetscErrorCode ierr; 1312 1313 PetscFunctionBegin; 1314 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatEqual_MPISBAIJ" 1322 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1323 { 1324 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1325 Mat a,b,c,d; 1326 PetscTruth flg; 1327 PetscErrorCode ierr; 1328 1329 PetscFunctionBegin; 1330 a = matA->A; b = matA->B; 1331 c = matB->A; d = matB->B; 1332 1333 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1334 if (flg) { 1335 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1336 } 1337 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatCopy_MPISBAIJ" 1343 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1344 { 1345 PetscErrorCode ierr; 1346 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1347 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1348 1349 PetscFunctionBegin; 1350 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1351 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1352 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1353 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1354 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1355 } else { 1356 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1357 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1358 } 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1364 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1365 { 1366 PetscErrorCode ierr; 1367 1368 PetscFunctionBegin; 1369 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #include "petscblaslapack.h" 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1376 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1377 { 1378 PetscErrorCode ierr; 1379 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1380 PetscBLASInt bnz,one=1; 1381 Mat_SeqSBAIJ *xa,*ya; 1382 Mat_SeqBAIJ *xb,*yb; 1383 1384 PetscFunctionBegin; 1385 if (str == SAME_NONZERO_PATTERN) { 1386 PetscScalar alpha = a; 1387 xa = (Mat_SeqSBAIJ *)xx->A->data; 1388 ya = (Mat_SeqSBAIJ *)yy->A->data; 1389 bnz = PetscBLASIntCast(xa->nz); 1390 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1391 xb = (Mat_SeqBAIJ *)xx->B->data; 1392 yb = (Mat_SeqBAIJ *)yy->B->data; 1393 bnz = PetscBLASIntCast(xb->nz); 1394 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1395 } else { 1396 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1397 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1398 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1405 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1406 { 1407 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1408 PetscInt rbs,cbs; 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1413 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1414 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1415 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1416 if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1417 if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1423 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1424 { 1425 PetscErrorCode ierr; 1426 PetscInt i; 1427 PetscTruth flg; 1428 1429 PetscFunctionBegin; 1430 for (i=0; i<n; i++) { 1431 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1432 if (!flg) { 1433 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1434 } 1435 } 1436 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 1441 /* -------------------------------------------------------------------*/ 1442 static struct _MatOps MatOps_Values = { 1443 MatSetValues_MPISBAIJ, 1444 MatGetRow_MPISBAIJ, 1445 MatRestoreRow_MPISBAIJ, 1446 MatMult_MPISBAIJ, 1447 /* 4*/ MatMultAdd_MPISBAIJ, 1448 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1449 MatMultAdd_MPISBAIJ, 1450 0, 1451 0, 1452 0, 1453 /*10*/ 0, 1454 0, 1455 0, 1456 MatSOR_MPISBAIJ, 1457 MatTranspose_MPISBAIJ, 1458 /*15*/ MatGetInfo_MPISBAIJ, 1459 MatEqual_MPISBAIJ, 1460 MatGetDiagonal_MPISBAIJ, 1461 MatDiagonalScale_MPISBAIJ, 1462 MatNorm_MPISBAIJ, 1463 /*20*/ MatAssemblyBegin_MPISBAIJ, 1464 MatAssemblyEnd_MPISBAIJ, 1465 MatSetOption_MPISBAIJ, 1466 MatZeroEntries_MPISBAIJ, 1467 /*24*/ 0, 1468 0, 1469 0, 1470 0, 1471 0, 1472 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1473 0, 1474 0, 1475 0, 1476 0, 1477 /*34*/ MatDuplicate_MPISBAIJ, 1478 0, 1479 0, 1480 0, 1481 0, 1482 /*39*/ MatAXPY_MPISBAIJ, 1483 MatGetSubMatrices_MPISBAIJ, 1484 MatIncreaseOverlap_MPISBAIJ, 1485 MatGetValues_MPISBAIJ, 1486 MatCopy_MPISBAIJ, 1487 /*44*/ 0, 1488 MatScale_MPISBAIJ, 1489 0, 1490 0, 1491 0, 1492 /*49*/ MatSetBlockSize_MPISBAIJ, 1493 0, 1494 0, 1495 0, 1496 0, 1497 /*54*/ 0, 1498 0, 1499 MatSetUnfactored_MPISBAIJ, 1500 0, 1501 MatSetValuesBlocked_MPISBAIJ, 1502 /*59*/ 0, 1503 0, 1504 0, 1505 0, 1506 0, 1507 /*64*/ 0, 1508 0, 1509 0, 1510 0, 1511 0, 1512 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1513 0, 1514 0, 1515 0, 1516 0, 1517 /*74*/ 0, 1518 0, 1519 0, 1520 0, 1521 0, 1522 /*79*/ 0, 1523 0, 1524 0, 1525 0, 1526 MatLoad_MPISBAIJ, 1527 /*84*/ 0, 1528 0, 1529 0, 1530 0, 1531 0, 1532 /*89*/ 0, 1533 0, 1534 0, 1535 0, 1536 0, 1537 /*94*/ 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 /*99*/ 0, 1543 0, 1544 0, 1545 0, 1546 0, 1547 /*104*/0, 1548 MatRealPart_MPISBAIJ, 1549 MatImaginaryPart_MPISBAIJ, 1550 MatGetRowUpperTriangular_MPISBAIJ, 1551 MatRestoreRowUpperTriangular_MPISBAIJ 1552 }; 1553 1554 1555 EXTERN_C_BEGIN 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1558 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1559 { 1560 PetscFunctionBegin; 1561 *a = ((Mat_MPISBAIJ *)A->data)->A; 1562 *iscopy = PETSC_FALSE; 1563 PetscFunctionReturn(0); 1564 } 1565 EXTERN_C_END 1566 1567 EXTERN_C_BEGIN 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1570 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1571 { 1572 Mat_MPISBAIJ *b; 1573 PetscErrorCode ierr; 1574 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1575 1576 PetscFunctionBegin; 1577 if (bs < 0){ 1578 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1579 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1580 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1581 bs = PetscAbs(bs); 1582 } 1583 if ((d_nnz || o_nnz) && newbs != bs) { 1584 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1585 } 1586 bs = newbs; 1587 1588 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1589 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1590 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1591 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1592 1593 B->rmap->bs = B->cmap->bs = bs; 1594 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1595 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1596 1597 if (d_nnz) { 1598 for (i=0; i<B->rmap->n/bs; i++) { 1599 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1600 } 1601 } 1602 if (o_nnz) { 1603 for (i=0; i<B->rmap->n/bs; i++) { 1604 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1605 } 1606 } 1607 1608 b = (Mat_MPISBAIJ*)B->data; 1609 mbs = B->rmap->n/bs; 1610 Mbs = B->rmap->N/bs; 1611 if (mbs*bs != B->rmap->n) { 1612 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1613 } 1614 1615 B->rmap->bs = bs; 1616 b->bs2 = bs*bs; 1617 b->mbs = mbs; 1618 b->nbs = mbs; 1619 b->Mbs = Mbs; 1620 b->Nbs = Mbs; 1621 1622 for (i=0; i<=b->size; i++) { 1623 b->rangebs[i] = B->rmap->range[i]/bs; 1624 } 1625 b->rstartbs = B->rmap->rstart/bs; 1626 b->rendbs = B->rmap->rend/bs; 1627 1628 b->cstartbs = B->cmap->rstart/bs; 1629 b->cendbs = B->cmap->rend/bs; 1630 1631 if (!B->preallocated) { 1632 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1633 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1634 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1635 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1636 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1637 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1638 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1639 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1640 /* build cache for off array entries formed */ 1641 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1642 } 1643 1644 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1645 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1646 B->preallocated = PETSC_TRUE; 1647 PetscFunctionReturn(0); 1648 } 1649 EXTERN_C_END 1650 1651 EXTERN_C_BEGIN 1652 #if defined(PETSC_HAVE_MUMPS) 1653 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*); 1654 #endif 1655 #if defined(PETSC_HAVE_SPOOLES) 1656 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1657 #endif 1658 #if defined(PETSC_HAVE_PASTIX) 1659 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1660 #endif 1661 EXTERN_C_END 1662 1663 /*MC 1664 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1665 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1666 1667 Options Database Keys: 1668 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1669 1670 Level: beginner 1671 1672 .seealso: MatCreateMPISBAIJ 1673 M*/ 1674 1675 EXTERN_C_BEGIN 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatCreate_MPISBAIJ" 1678 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1679 { 1680 Mat_MPISBAIJ *b; 1681 PetscErrorCode ierr; 1682 PetscTruth flg; 1683 1684 PetscFunctionBegin; 1685 1686 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1687 B->data = (void*)b; 1688 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1689 1690 B->ops->destroy = MatDestroy_MPISBAIJ; 1691 B->ops->view = MatView_MPISBAIJ; 1692 B->mapping = 0; 1693 B->assembled = PETSC_FALSE; 1694 1695 B->insertmode = NOT_SET_VALUES; 1696 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1697 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1698 1699 /* build local table of row and column ownerships */ 1700 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1701 1702 /* build cache for off array entries formed */ 1703 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1704 b->donotstash = PETSC_FALSE; 1705 b->colmap = PETSC_NULL; 1706 b->garray = PETSC_NULL; 1707 b->roworiented = PETSC_TRUE; 1708 1709 /* stuff used in block assembly */ 1710 b->barray = 0; 1711 1712 /* stuff used for matrix vector multiply */ 1713 b->lvec = 0; 1714 b->Mvctx = 0; 1715 b->slvec0 = 0; 1716 b->slvec0b = 0; 1717 b->slvec1 = 0; 1718 b->slvec1a = 0; 1719 b->slvec1b = 0; 1720 b->sMvctx = 0; 1721 1722 /* stuff for MatGetRow() */ 1723 b->rowindices = 0; 1724 b->rowvalues = 0; 1725 b->getrowactive = PETSC_FALSE; 1726 1727 /* hash table stuff */ 1728 b->ht = 0; 1729 b->hd = 0; 1730 b->ht_size = 0; 1731 b->ht_flag = PETSC_FALSE; 1732 b->ht_fact = 0; 1733 b->ht_total_ct = 0; 1734 b->ht_insert_ct = 0; 1735 1736 b->in_loc = 0; 1737 b->v_loc = 0; 1738 b->n_loc = 0; 1739 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1740 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1741 if (flg) { 1742 PetscReal fact = 1.39; 1743 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1744 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1745 if (fact <= 1.0) fact = 1.39; 1746 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1747 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1748 } 1749 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1750 1751 #if defined(PETSC_HAVE_PASTIX) 1752 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1753 "MatGetFactor_mpisbaij_pastix", 1754 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1755 #endif 1756 #if defined(PETSC_HAVE_MUMPS) 1757 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1758 "MatGetFactor_mpisbaij_mumps", 1759 MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr); 1760 #endif 1761 #if defined(PETSC_HAVE_SPOOLES) 1762 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1763 "MatGetFactor_mpisbaij_spooles", 1764 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1765 #endif 1766 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1767 "MatStoreValues_MPISBAIJ", 1768 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1769 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1770 "MatRetrieveValues_MPISBAIJ", 1771 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1772 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1773 "MatGetDiagonalBlock_MPISBAIJ", 1774 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1775 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1776 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1777 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1778 B->symmetric = PETSC_TRUE; 1779 B->structurally_symmetric = PETSC_TRUE; 1780 B->symmetric_set = PETSC_TRUE; 1781 B->structurally_symmetric_set = PETSC_TRUE; 1782 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1783 PetscFunctionReturn(0); 1784 } 1785 EXTERN_C_END 1786 1787 /*MC 1788 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1789 1790 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1791 and MATMPISBAIJ otherwise. 1792 1793 Options Database Keys: 1794 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1795 1796 Level: beginner 1797 1798 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1799 M*/ 1800 1801 EXTERN_C_BEGIN 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatCreate_SBAIJ" 1804 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1805 { 1806 PetscErrorCode ierr; 1807 PetscMPIInt size; 1808 1809 PetscFunctionBegin; 1810 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1811 if (size == 1) { 1812 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1813 } else { 1814 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1815 } 1816 PetscFunctionReturn(0); 1817 } 1818 EXTERN_C_END 1819 1820 #undef __FUNCT__ 1821 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1822 /*@C 1823 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1824 the user should preallocate the matrix storage by setting the parameters 1825 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1826 performance can be increased by more than a factor of 50. 1827 1828 Collective on Mat 1829 1830 Input Parameters: 1831 + A - the matrix 1832 . bs - size of blockk 1833 . d_nz - number of block nonzeros per block row in diagonal portion of local 1834 submatrix (same for all local rows) 1835 . d_nnz - array containing the number of block nonzeros in the various block rows 1836 in the upper triangular and diagonal part of the in diagonal portion of the local 1837 (possibly different for each block row) or PETSC_NULL. You must leave room 1838 for the diagonal entry even if it is zero. 1839 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1840 submatrix (same for all local rows). 1841 - o_nnz - array containing the number of nonzeros in the various block rows of the 1842 off-diagonal portion of the local submatrix (possibly different for 1843 each block row) or PETSC_NULL. 1844 1845 1846 Options Database Keys: 1847 . -mat_no_unroll - uses code that does not unroll the loops in the 1848 block calculations (much slower) 1849 . -mat_block_size - size of the blocks to use 1850 1851 Notes: 1852 1853 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1854 than it must be used on all processors that share the object for that argument. 1855 1856 If the *_nnz parameter is given then the *_nz parameter is ignored 1857 1858 Storage Information: 1859 For a square global matrix we define each processor's diagonal portion 1860 to be its local rows and the corresponding columns (a square submatrix); 1861 each processor's off-diagonal portion encompasses the remainder of the 1862 local matrix (a rectangular submatrix). 1863 1864 The user can specify preallocated storage for the diagonal part of 1865 the local submatrix with either d_nz or d_nnz (not both). Set 1866 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1867 memory allocation. Likewise, specify preallocated storage for the 1868 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1869 1870 You can call MatGetInfo() to get information on how effective the preallocation was; 1871 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1872 You can also run with the option -info and look for messages with the string 1873 malloc in them to see if additional memory allocation was needed. 1874 1875 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1876 the figure below we depict these three local rows and all columns (0-11). 1877 1878 .vb 1879 0 1 2 3 4 5 6 7 8 9 10 11 1880 ------------------- 1881 row 3 | o o o d d d o o o o o o 1882 row 4 | o o o d d d o o o o o o 1883 row 5 | o o o d d d o o o o o o 1884 ------------------- 1885 .ve 1886 1887 Thus, any entries in the d locations are stored in the d (diagonal) 1888 submatrix, and any entries in the o locations are stored in the 1889 o (off-diagonal) submatrix. Note that the d matrix is stored in 1890 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1891 1892 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1893 plus the diagonal part of the d matrix, 1894 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1895 In general, for PDE problems in which most nonzeros are near the diagonal, 1896 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1897 or you will get TERRIBLE performance; see the users' manual chapter on 1898 matrices. 1899 1900 Level: intermediate 1901 1902 .keywords: matrix, block, aij, compressed row, sparse, parallel 1903 1904 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1905 @*/ 1906 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1907 { 1908 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1909 1910 PetscFunctionBegin; 1911 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1912 if (f) { 1913 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatCreateMPISBAIJ" 1920 /*@C 1921 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1922 (block compressed row). For good matrix assembly performance 1923 the user should preallocate the matrix storage by setting the parameters 1924 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1925 performance can be increased by more than a factor of 50. 1926 1927 Collective on MPI_Comm 1928 1929 Input Parameters: 1930 + comm - MPI communicator 1931 . bs - size of blockk 1932 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1933 This value should be the same as the local size used in creating the 1934 y vector for the matrix-vector product y = Ax. 1935 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1936 This value should be the same as the local size used in creating the 1937 x vector for the matrix-vector product y = Ax. 1938 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1939 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1940 . d_nz - number of block nonzeros per block row in diagonal portion of local 1941 submatrix (same for all local rows) 1942 . d_nnz - array containing the number of block nonzeros in the various block rows 1943 in the upper triangular portion of the in diagonal portion of the local 1944 (possibly different for each block block row) or PETSC_NULL. 1945 You must leave room for the diagonal entry even if it is zero. 1946 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1947 submatrix (same for all local rows). 1948 - o_nnz - array containing the number of nonzeros in the various block rows of the 1949 off-diagonal portion of the local submatrix (possibly different for 1950 each block row) or PETSC_NULL. 1951 1952 Output Parameter: 1953 . A - the matrix 1954 1955 Options Database Keys: 1956 . -mat_no_unroll - uses code that does not unroll the loops in the 1957 block calculations (much slower) 1958 . -mat_block_size - size of the blocks to use 1959 . -mat_mpi - use the parallel matrix data structures even on one processor 1960 (defaults to using SeqBAIJ format on one processor) 1961 1962 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1963 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1964 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1965 1966 Notes: 1967 The number of rows and columns must be divisible by blocksize. 1968 This matrix type does not support complex Hermitian operation. 1969 1970 The user MUST specify either the local or global matrix dimensions 1971 (possibly both). 1972 1973 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1974 than it must be used on all processors that share the object for that argument. 1975 1976 If the *_nnz parameter is given then the *_nz parameter is ignored 1977 1978 Storage Information: 1979 For a square global matrix we define each processor's diagonal portion 1980 to be its local rows and the corresponding columns (a square submatrix); 1981 each processor's off-diagonal portion encompasses the remainder of the 1982 local matrix (a rectangular submatrix). 1983 1984 The user can specify preallocated storage for the diagonal part of 1985 the local submatrix with either d_nz or d_nnz (not both). Set 1986 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1987 memory allocation. Likewise, specify preallocated storage for the 1988 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1989 1990 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1991 the figure below we depict these three local rows and all columns (0-11). 1992 1993 .vb 1994 0 1 2 3 4 5 6 7 8 9 10 11 1995 ------------------- 1996 row 3 | o o o d d d o o o o o o 1997 row 4 | o o o d d d o o o o o o 1998 row 5 | o o o d d d o o o o o o 1999 ------------------- 2000 .ve 2001 2002 Thus, any entries in the d locations are stored in the d (diagonal) 2003 submatrix, and any entries in the o locations are stored in the 2004 o (off-diagonal) submatrix. Note that the d matrix is stored in 2005 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2006 2007 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2008 plus the diagonal part of the d matrix, 2009 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2010 In general, for PDE problems in which most nonzeros are near the diagonal, 2011 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2012 or you will get TERRIBLE performance; see the users' manual chapter on 2013 matrices. 2014 2015 Level: intermediate 2016 2017 .keywords: matrix, block, aij, compressed row, sparse, parallel 2018 2019 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2020 @*/ 2021 2022 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2023 { 2024 PetscErrorCode ierr; 2025 PetscMPIInt size; 2026 2027 PetscFunctionBegin; 2028 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2029 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2030 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2031 if (size > 1) { 2032 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2033 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2034 } else { 2035 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2036 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2037 } 2038 PetscFunctionReturn(0); 2039 } 2040 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2044 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2045 { 2046 Mat mat; 2047 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2048 PetscErrorCode ierr; 2049 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2050 PetscScalar *array; 2051 2052 PetscFunctionBegin; 2053 *newmat = 0; 2054 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2055 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2056 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2057 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2058 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2059 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2060 2061 mat->factor = matin->factor; 2062 mat->preallocated = PETSC_TRUE; 2063 mat->assembled = PETSC_TRUE; 2064 mat->insertmode = NOT_SET_VALUES; 2065 2066 a = (Mat_MPISBAIJ*)mat->data; 2067 a->bs2 = oldmat->bs2; 2068 a->mbs = oldmat->mbs; 2069 a->nbs = oldmat->nbs; 2070 a->Mbs = oldmat->Mbs; 2071 a->Nbs = oldmat->Nbs; 2072 2073 2074 a->size = oldmat->size; 2075 a->rank = oldmat->rank; 2076 a->donotstash = oldmat->donotstash; 2077 a->roworiented = oldmat->roworiented; 2078 a->rowindices = 0; 2079 a->rowvalues = 0; 2080 a->getrowactive = PETSC_FALSE; 2081 a->barray = 0; 2082 a->rstartbs = oldmat->rstartbs; 2083 a->rendbs = oldmat->rendbs; 2084 a->cstartbs = oldmat->cstartbs; 2085 a->cendbs = oldmat->cendbs; 2086 2087 /* hash table stuff */ 2088 a->ht = 0; 2089 a->hd = 0; 2090 a->ht_size = 0; 2091 a->ht_flag = oldmat->ht_flag; 2092 a->ht_fact = oldmat->ht_fact; 2093 a->ht_total_ct = 0; 2094 a->ht_insert_ct = 0; 2095 2096 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2097 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2098 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2099 if (oldmat->colmap) { 2100 #if defined (PETSC_USE_CTABLE) 2101 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2102 #else 2103 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2104 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2105 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2106 #endif 2107 } else a->colmap = 0; 2108 2109 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2110 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2111 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2112 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2113 } else a->garray = 0; 2114 2115 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2116 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2117 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2118 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2119 2120 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2121 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2122 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2123 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2124 2125 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2126 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2127 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2128 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2129 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2130 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2131 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2132 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2133 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2134 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2135 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2136 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2137 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2138 2139 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2140 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2141 a->sMvctx = oldmat->sMvctx; 2142 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2143 2144 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2145 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2146 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2147 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2148 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2149 *newmat = mat; 2150 PetscFunctionReturn(0); 2151 } 2152 2153 #include "petscsys.h" 2154 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatLoad_MPISBAIJ" 2157 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2158 { 2159 Mat A; 2160 PetscErrorCode ierr; 2161 PetscInt i,nz,j,rstart,rend; 2162 PetscScalar *vals,*buf; 2163 MPI_Comm comm = ((PetscObject)viewer)->comm; 2164 MPI_Status status; 2165 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2166 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2167 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2168 PetscInt bs=1,Mbs,mbs,extra_rows; 2169 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2170 PetscInt dcount,kmax,k,nzcount,tmp; 2171 int fd; 2172 2173 PetscFunctionBegin; 2174 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2175 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2176 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2177 2178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2179 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2180 if (!rank) { 2181 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2182 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2183 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2184 if (header[3] < 0) { 2185 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2186 } 2187 } 2188 2189 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2190 M = header[1]; N = header[2]; 2191 2192 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2193 2194 /* 2195 This code adds extra rows to make sure the number of rows is 2196 divisible by the blocksize 2197 */ 2198 Mbs = M/bs; 2199 extra_rows = bs - M + bs*(Mbs); 2200 if (extra_rows == bs) extra_rows = 0; 2201 else Mbs++; 2202 if (extra_rows &&!rank) { 2203 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2204 } 2205 2206 /* determine ownership of all rows */ 2207 mbs = Mbs/size + ((Mbs % size) > rank); 2208 m = mbs*bs; 2209 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2210 mmbs = PetscMPIIntCast(mbs); 2211 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2212 rowners[0] = 0; 2213 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2214 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2215 rstart = rowners[rank]; 2216 rend = rowners[rank+1]; 2217 2218 /* distribute row lengths to all processors */ 2219 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2220 if (!rank) { 2221 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2222 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2223 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2224 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2225 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2226 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2227 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2228 } else { 2229 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2230 } 2231 2232 if (!rank) { /* procs[0] */ 2233 /* calculate the number of nonzeros on each processor */ 2234 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2235 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2236 for (i=0; i<size; i++) { 2237 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2238 procsnz[i] += rowlengths[j]; 2239 } 2240 } 2241 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2242 2243 /* determine max buffer needed and allocate it */ 2244 maxnz = 0; 2245 for (i=0; i<size; i++) { 2246 maxnz = PetscMax(maxnz,procsnz[i]); 2247 } 2248 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2249 2250 /* read in my part of the matrix column indices */ 2251 nz = procsnz[0]; 2252 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2253 mycols = ibuf; 2254 if (size == 1) nz -= extra_rows; 2255 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2256 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2257 2258 /* read in every ones (except the last) and ship off */ 2259 for (i=1; i<size-1; i++) { 2260 nz = procsnz[i]; 2261 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2262 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2263 } 2264 /* read in the stuff for the last proc */ 2265 if (size != 1) { 2266 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2267 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2268 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2269 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2270 } 2271 ierr = PetscFree(cols);CHKERRQ(ierr); 2272 } else { /* procs[i], i>0 */ 2273 /* determine buffer space needed for message */ 2274 nz = 0; 2275 for (i=0; i<m; i++) { 2276 nz += locrowlens[i]; 2277 } 2278 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2279 mycols = ibuf; 2280 /* receive message of column indices*/ 2281 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2282 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2283 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2284 } 2285 2286 /* loop over local rows, determining number of off diagonal entries */ 2287 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2288 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2289 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2290 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2291 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2292 rowcount = 0; 2293 nzcount = 0; 2294 for (i=0; i<mbs; i++) { 2295 dcount = 0; 2296 odcount = 0; 2297 for (j=0; j<bs; j++) { 2298 kmax = locrowlens[rowcount]; 2299 for (k=0; k<kmax; k++) { 2300 tmp = mycols[nzcount++]/bs; /* block col. index */ 2301 if (!mask[tmp]) { 2302 mask[tmp] = 1; 2303 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2304 else masked1[dcount++] = tmp; /* entry in diag portion */ 2305 } 2306 } 2307 rowcount++; 2308 } 2309 2310 dlens[i] = dcount; /* d_nzz[i] */ 2311 odlens[i] = odcount; /* o_nzz[i] */ 2312 2313 /* zero out the mask elements we set */ 2314 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2315 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2316 } 2317 2318 /* create our matrix */ 2319 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2320 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2321 ierr = MatSetType(A,type);CHKERRQ(ierr); 2322 ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2323 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2324 2325 if (!rank) { 2326 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2327 /* read in my part of the matrix numerical values */ 2328 nz = procsnz[0]; 2329 vals = buf; 2330 mycols = ibuf; 2331 if (size == 1) nz -= extra_rows; 2332 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2333 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2334 2335 /* insert into matrix */ 2336 jj = rstart*bs; 2337 for (i=0; i<m; i++) { 2338 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2339 mycols += locrowlens[i]; 2340 vals += locrowlens[i]; 2341 jj++; 2342 } 2343 2344 /* read in other processors (except the last one) and ship out */ 2345 for (i=1; i<size-1; i++) { 2346 nz = procsnz[i]; 2347 vals = buf; 2348 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2349 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2350 } 2351 /* the last proc */ 2352 if (size != 1){ 2353 nz = procsnz[i] - extra_rows; 2354 vals = buf; 2355 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2356 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2357 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2358 } 2359 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2360 2361 } else { 2362 /* receive numeric values */ 2363 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2364 2365 /* receive message of values*/ 2366 vals = buf; 2367 mycols = ibuf; 2368 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2369 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2370 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2371 2372 /* insert into matrix */ 2373 jj = rstart*bs; 2374 for (i=0; i<m; i++) { 2375 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2376 mycols += locrowlens[i]; 2377 vals += locrowlens[i]; 2378 jj++; 2379 } 2380 } 2381 2382 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2383 ierr = PetscFree(buf);CHKERRQ(ierr); 2384 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2385 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2386 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2387 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2388 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2389 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2390 *newmat = A; 2391 PetscFunctionReturn(0); 2392 } 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2396 /*XXXXX@ 2397 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2398 2399 Input Parameters: 2400 . mat - the matrix 2401 . fact - factor 2402 2403 Collective on Mat 2404 2405 Level: advanced 2406 2407 Notes: 2408 This can also be set by the command line option: -mat_use_hash_table fact 2409 2410 .keywords: matrix, hashtable, factor, HT 2411 2412 .seealso: MatSetOption() 2413 @XXXXX*/ 2414 2415 2416 #undef __FUNCT__ 2417 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2418 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2419 { 2420 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2421 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2422 PetscReal atmp; 2423 PetscReal *work,*svalues,*rvalues; 2424 PetscErrorCode ierr; 2425 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2426 PetscMPIInt rank,size; 2427 PetscInt *rowners_bs,dest,count,source; 2428 PetscScalar *va; 2429 MatScalar *ba; 2430 MPI_Status stat; 2431 2432 PetscFunctionBegin; 2433 if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2434 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2435 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2436 2437 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2438 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2439 2440 bs = A->rmap->bs; 2441 mbs = a->mbs; 2442 Mbs = a->Mbs; 2443 ba = b->a; 2444 bi = b->i; 2445 bj = b->j; 2446 2447 /* find ownerships */ 2448 rowners_bs = A->rmap->range; 2449 2450 /* each proc creates an array to be distributed */ 2451 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2452 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2453 2454 /* row_max for B */ 2455 if (rank != size-1){ 2456 for (i=0; i<mbs; i++) { 2457 ncols = bi[1] - bi[0]; bi++; 2458 brow = bs*i; 2459 for (j=0; j<ncols; j++){ 2460 bcol = bs*(*bj); 2461 for (kcol=0; kcol<bs; kcol++){ 2462 col = bcol + kcol; /* local col index */ 2463 col += rowners_bs[rank+1]; /* global col index */ 2464 for (krow=0; krow<bs; krow++){ 2465 atmp = PetscAbsScalar(*ba); ba++; 2466 row = brow + krow; /* local row index */ 2467 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2468 if (work[col] < atmp) work[col] = atmp; 2469 } 2470 } 2471 bj++; 2472 } 2473 } 2474 2475 /* send values to its owners */ 2476 for (dest=rank+1; dest<size; dest++){ 2477 svalues = work + rowners_bs[dest]; 2478 count = rowners_bs[dest+1]-rowners_bs[dest]; 2479 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2480 } 2481 } 2482 2483 /* receive values */ 2484 if (rank){ 2485 rvalues = work; 2486 count = rowners_bs[rank+1]-rowners_bs[rank]; 2487 for (source=0; source<rank; source++){ 2488 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2489 /* process values */ 2490 for (i=0; i<count; i++){ 2491 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2492 } 2493 } 2494 } 2495 2496 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2497 ierr = PetscFree(work);CHKERRQ(ierr); 2498 PetscFunctionReturn(0); 2499 } 2500 2501 #undef __FUNCT__ 2502 #define __FUNCT__ "MatSOR_MPISBAIJ" 2503 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2504 { 2505 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2506 PetscErrorCode ierr; 2507 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2508 PetscScalar *x,*b,*ptr,*from; 2509 Vec bb1; 2510 2511 PetscFunctionBegin; 2512 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2513 if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2514 2515 if (flag == SOR_APPLY_UPPER) { 2516 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2521 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2522 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2523 its--; 2524 } 2525 2526 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2527 while (its--){ 2528 2529 /* lower triangular part: slvec0b = - B^T*xx */ 2530 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2531 2532 /* copy xx into slvec0a */ 2533 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2534 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2535 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2536 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2537 2538 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2539 2540 /* copy bb into slvec1a */ 2541 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2542 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2543 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2544 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2545 2546 /* set slvec1b = 0 */ 2547 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2548 2549 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2550 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2551 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2552 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2553 2554 /* upper triangular part: bb1 = bb1 - B*x */ 2555 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2556 2557 /* local diagonal sweep */ 2558 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2559 } 2560 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2561 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2562 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2563 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2564 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2565 } else if (flag & SOR_EISENSTAT) { 2566 Vec xx1; 2567 PetscTruth hasop; 2568 const PetscScalar *diag; 2569 PetscScalar *sl,scale = (omega - 2.0)/omega; 2570 PetscInt i,n; 2571 2572 if (!mat->xx1) { 2573 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2574 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2575 } 2576 xx1 = mat->xx1; 2577 bb1 = mat->bb1; 2578 2579 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2580 2581 if (!mat->diag) { 2582 /* this is wrong for same matrix with new nonzero values */ 2583 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2584 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2585 } 2586 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2587 2588 if (hasop) { 2589 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2590 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2591 } else { 2592 /* 2593 These two lines are replaced by code that may be a bit faster for a good compiler 2594 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2595 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2596 */ 2597 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2598 ierr = VecGetArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr); 2599 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2600 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2601 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2602 if (omega == 1.0) { 2603 for (i=0; i<n; i++) { 2604 sl[i] = b[i] - diag[i]*x[i]; 2605 } 2606 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2607 } else { 2608 for (i=0; i<n; i++) { 2609 sl[i] = b[i] + scale*diag[i]*x[i]; 2610 } 2611 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2612 } 2613 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2614 ierr = VecRestoreArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr); 2615 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2616 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2617 } 2618 2619 /* multiply off-diagonal portion of matrix */ 2620 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2621 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2622 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2623 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2624 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2625 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2626 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2627 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2628 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2629 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2630 2631 /* local sweep */ 2632 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2633 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2634 } else { 2635 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2636 } 2637 PetscFunctionReturn(0); 2638 } 2639 2640 #undef __FUNCT__ 2641 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2642 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2643 { 2644 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2645 PetscErrorCode ierr; 2646 Vec lvec1,bb1; 2647 2648 PetscFunctionBegin; 2649 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2650 if (matin->rmap->bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2651 2652 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2653 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2654 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2655 its--; 2656 } 2657 2658 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2659 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2660 while (its--){ 2661 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2662 2663 /* lower diagonal part: bb1 = bb - B^T*xx */ 2664 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2665 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2666 2667 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2668 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2669 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2670 2671 /* upper diagonal part: bb1 = bb1 - B*x */ 2672 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2673 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2674 2675 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2676 2677 /* diagonal sweep */ 2678 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2679 } 2680 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2681 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2682 } else { 2683 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2684 } 2685 PetscFunctionReturn(0); 2686 } 2687 2688