1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "../src/mat/impls/sbaij/seq/sbaij.h" 6 #include "petscblaslapack.h" 7 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 EXTERN_C_BEGIN 25 #undef __FUNCT__ 26 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 28 { 29 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 30 PetscErrorCode ierr; 31 32 PetscFunctionBegin; 33 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 34 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 EXTERN_C_BEGIN 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 43 { 44 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 45 PetscErrorCode ierr; 46 47 PetscFunctionBegin; 48 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 49 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 EXTERN_C_END 53 54 55 #define CHUNKSIZE 10 56 57 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 58 { \ 59 \ 60 brow = row/bs; \ 61 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 62 rmax = aimax[brow]; nrow = ailen[brow]; \ 63 bcol = col/bs; \ 64 ridx = row % bs; cidx = col % bs; \ 65 low = 0; high = nrow; \ 66 while (high-low > 3) { \ 67 t = (low+high)/2; \ 68 if (rp[t] > bcol) high = t; \ 69 else low = t; \ 70 } \ 71 for (_i=low; _i<high; _i++) { \ 72 if (rp[_i] > bcol) break; \ 73 if (rp[_i] == bcol) { \ 74 bap = ap + bs2*_i + bs*cidx + ridx; \ 75 if (addv == ADD_VALUES) *bap += value; \ 76 else *bap = value; \ 77 goto a_noinsert; \ 78 } \ 79 } \ 80 if (a->nonew == 1) goto a_noinsert; \ 81 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 82 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 83 N = nrow++ - 1; \ 84 /* shift up all the later entries in this row */ \ 85 for (ii=N; ii>=_i; ii--) { \ 86 rp[ii+1] = rp[ii]; \ 87 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 88 } \ 89 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 90 rp[_i] = bcol; \ 91 ap[bs2*_i + bs*cidx + ridx] = value; \ 92 a_noinsert:; \ 93 ailen[brow] = nrow; \ 94 } 95 96 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 97 { \ 98 brow = row/bs; \ 99 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 100 rmax = bimax[brow]; nrow = bilen[brow]; \ 101 bcol = col/bs; \ 102 ridx = row % bs; cidx = col % bs; \ 103 low = 0; high = nrow; \ 104 while (high-low > 3) { \ 105 t = (low+high)/2; \ 106 if (rp[t] > bcol) high = t; \ 107 else low = t; \ 108 } \ 109 for (_i=low; _i<high; _i++) { \ 110 if (rp[_i] > bcol) break; \ 111 if (rp[_i] == bcol) { \ 112 bap = ap + bs2*_i + bs*cidx + ridx; \ 113 if (addv == ADD_VALUES) *bap += value; \ 114 else *bap = value; \ 115 goto b_noinsert; \ 116 } \ 117 } \ 118 if (b->nonew == 1) goto b_noinsert; \ 119 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 120 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 121 N = nrow++ - 1; \ 122 /* shift up all the later entries in this row */ \ 123 for (ii=N; ii>=_i; ii--) { \ 124 rp[ii+1] = rp[ii]; \ 125 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 126 } \ 127 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 128 rp[_i] = bcol; \ 129 ap[bs2*_i + bs*cidx + ridx] = value; \ 130 b_noinsert:; \ 131 bilen[brow] = nrow; \ 132 } 133 134 /* Only add/insert a(i,j) with i<=j (blocks). 135 Any a(i,j) with i>j input by user is ingored. 136 */ 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatSetValues_MPISBAIJ" 139 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 140 { 141 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 142 MatScalar value; 143 PetscTruth roworiented = baij->roworiented; 144 PetscErrorCode ierr; 145 PetscInt i,j,row,col; 146 PetscInt rstart_orig=mat->rmap->rstart; 147 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 148 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 149 150 /* Some Variables required in the macro */ 151 Mat A = baij->A; 152 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 153 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 154 MatScalar *aa=a->a; 155 156 Mat B = baij->B; 157 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 158 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 159 MatScalar *ba=b->a; 160 161 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 162 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 163 MatScalar *ap,*bap; 164 165 /* for stash */ 166 PetscInt n_loc, *in_loc = PETSC_NULL; 167 MatScalar *v_loc = PETSC_NULL; 168 169 PetscFunctionBegin; 170 if (v) PetscValidScalarPointer(v,6); 171 if (!baij->donotstash){ 172 if (n > baij->n_loc) { 173 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 174 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 175 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 176 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 177 baij->n_loc = n; 178 } 179 in_loc = baij->in_loc; 180 v_loc = baij->v_loc; 181 } 182 183 for (i=0; i<m; i++) { 184 if (im[i] < 0) continue; 185 #if defined(PETSC_USE_DEBUG) 186 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 187 #endif 188 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 189 row = im[i] - rstart_orig; /* local row index */ 190 for (j=0; j<n; j++) { 191 if (im[i]/bs > in[j]/bs){ 192 if (a->ignore_ltriangular){ 193 continue; /* ignore lower triangular blocks */ 194 } else { 195 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 196 } 197 } 198 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 199 col = in[j] - cstart_orig; /* local col index */ 200 brow = row/bs; bcol = col/bs; 201 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 202 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 203 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 204 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 205 } else if (in[j] < 0) continue; 206 #if defined(PETSC_USE_DEBUG) 207 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 208 #endif 209 else { /* off-diag entry (B) */ 210 if (mat->was_assembled) { 211 if (!baij->colmap) { 212 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 213 } 214 #if defined (PETSC_USE_CTABLE) 215 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 216 col = col - 1; 217 #else 218 col = baij->colmap[in[j]/bs] - 1; 219 #endif 220 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 221 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 222 col = in[j]; 223 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 224 B = baij->B; 225 b = (Mat_SeqBAIJ*)(B)->data; 226 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 227 ba=b->a; 228 } else col += in[j]%bs; 229 } else col = in[j]; 230 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 231 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } 234 } 235 } else { /* off processor entry */ 236 if (!baij->donotstash) { 237 n_loc = 0; 238 for (j=0; j<n; j++){ 239 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 240 in_loc[n_loc] = in[j]; 241 if (roworiented) { 242 v_loc[n_loc] = v[i*n+j]; 243 } else { 244 v_loc[n_loc] = v[j*m+i]; 245 } 246 n_loc++; 247 } 248 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 249 } 250 } 251 } 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 257 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 258 { 259 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 260 const MatScalar *value; 261 MatScalar *barray=baij->barray; 262 PetscTruth roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 263 PetscErrorCode ierr; 264 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 265 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 266 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 267 268 PetscFunctionBegin; 269 if(!barray) { 270 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 271 baij->barray = barray; 272 } 273 274 if (roworiented) { 275 stepval = (n-1)*bs; 276 } else { 277 stepval = (m-1)*bs; 278 } 279 for (i=0; i<m; i++) { 280 if (im[i] < 0) continue; 281 #if defined(PETSC_USE_DEBUG) 282 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 283 #endif 284 if (im[i] >= rstart && im[i] < rend) { 285 row = im[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (im[i] > in[j]) { 288 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 289 else SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 290 } 291 /* If NumCol = 1 then a copy is not required */ 292 if ((roworiented) && (n == 1)) { 293 barray = (MatScalar*) v + i*bs2; 294 } else if((!roworiented) && (m == 1)) { 295 barray = (MatScalar*) v + j*bs2; 296 } else { /* Here a copy is required */ 297 if (roworiented) { 298 value = v + i*(stepval+bs)*bs + j*bs; 299 } else { 300 value = v + j*(stepval+bs)*bs + i*bs; 301 } 302 for (ii=0; ii<bs; ii++,value+=stepval) { 303 for (jj=0; jj<bs; jj++) { 304 *barray++ = *value++; 305 } 306 } 307 barray -=bs2; 308 } 309 310 if (in[j] >= cstart && in[j] < cend){ 311 col = in[j] - cstart; 312 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 313 } 314 else if (in[j] < 0) continue; 315 #if defined(PETSC_USE_DEBUG) 316 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 317 #endif 318 else { 319 if (mat->was_assembled) { 320 if (!baij->colmap) { 321 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 322 } 323 324 #if defined(PETSC_USE_DEBUG) 325 #if defined (PETSC_USE_CTABLE) 326 { PetscInt data; 327 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 328 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 329 } 330 #else 331 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 332 #endif 333 #endif 334 #if defined (PETSC_USE_CTABLE) 335 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 336 col = (col - 1)/bs; 337 #else 338 col = (baij->colmap[in[j]] - 1)/bs; 339 #endif 340 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 341 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 342 col = in[j]; 343 } 344 } 345 else col = in[j]; 346 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 347 } 348 } 349 } else { 350 if (!baij->donotstash) { 351 if (roworiented) { 352 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 353 } else { 354 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 355 } 356 } 357 } 358 } 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatGetValues_MPISBAIJ" 364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 365 { 366 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 367 PetscErrorCode ierr; 368 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 369 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 370 371 PetscFunctionBegin; 372 for (i=0; i<m; i++) { 373 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 374 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 375 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 376 row = idxm[i] - bsrstart; 377 for (j=0; j<n; j++) { 378 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 379 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 380 if (idxn[j] >= bscstart && idxn[j] < bscend){ 381 col = idxn[j] - bscstart; 382 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 383 } else { 384 if (!baij->colmap) { 385 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 386 } 387 #if defined (PETSC_USE_CTABLE) 388 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 389 data --; 390 #else 391 data = baij->colmap[idxn[j]/bs]-1; 392 #endif 393 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 394 else { 395 col = data + idxn[j]%bs; 396 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 397 } 398 } 399 } 400 } else { 401 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 402 } 403 } 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatNorm_MPISBAIJ" 409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 410 { 411 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscReal sum[2],*lnorm2; 414 415 PetscFunctionBegin; 416 if (baij->size == 1) { 417 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 418 } else { 419 if (type == NORM_FROBENIUS) { 420 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 421 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 422 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 423 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 424 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 425 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 426 *norm = sqrt(sum[0] + 2*sum[1]); 427 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 428 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 429 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 430 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 431 PetscReal *rsum,*rsum2,vabs; 432 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 433 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 434 MatScalar *v; 435 436 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 437 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 438 /* Amat */ 439 v = amat->a; jj = amat->j; 440 for (brow=0; brow<mbs; brow++) { 441 grow = bs*(rstart + brow); 442 nz = amat->i[brow+1] - amat->i[brow]; 443 for (bcol=0; bcol<nz; bcol++){ 444 gcol = bs*(rstart + *jj); jj++; 445 for (col=0; col<bs; col++){ 446 for (row=0; row<bs; row++){ 447 vabs = PetscAbsScalar(*v); v++; 448 rsum[gcol+col] += vabs; 449 /* non-diagonal block */ 450 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 451 } 452 } 453 } 454 } 455 /* Bmat */ 456 v = bmat->a; jj = bmat->j; 457 for (brow=0; brow<mbs; brow++) { 458 grow = bs*(rstart + brow); 459 nz = bmat->i[brow+1] - bmat->i[brow]; 460 for (bcol=0; bcol<nz; bcol++){ 461 gcol = bs*garray[*jj]; jj++; 462 for (col=0; col<bs; col++){ 463 for (row=0; row<bs; row++){ 464 vabs = PetscAbsScalar(*v); v++; 465 rsum[gcol+col] += vabs; 466 rsum[grow+row] += vabs; 467 } 468 } 469 } 470 } 471 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 472 *norm = 0.0; 473 for (col=0; col<mat->cmap->N; col++) { 474 if (rsum2[col] > *norm) *norm = rsum2[col]; 475 } 476 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 477 } else { 478 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 479 } 480 } 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 487 { 488 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 489 PetscErrorCode ierr; 490 PetscInt nstash,reallocs; 491 InsertMode addv; 492 493 PetscFunctionBegin; 494 if (baij->donotstash) { 495 PetscFunctionReturn(0); 496 } 497 498 /* make sure all processors are either in INSERTMODE or ADDMODE */ 499 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 500 if (addv == (ADD_VALUES|INSERT_VALUES)) { 501 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 502 } 503 mat->insertmode = addv; /* in case this processor had no cache */ 504 505 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 506 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 507 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 508 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 509 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 510 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 511 PetscFunctionReturn(0); 512 } 513 514 #undef __FUNCT__ 515 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 516 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 517 { 518 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 519 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 520 PetscErrorCode ierr; 521 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 522 PetscInt *row,*col; 523 PetscTruth other_disassembled; 524 PetscMPIInt n; 525 PetscTruth r1,r2,r3; 526 MatScalar *val; 527 InsertMode addv = mat->insertmode; 528 529 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 530 PetscFunctionBegin; 531 532 if (!baij->donotstash) { 533 while (1) { 534 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 535 if (!flg) break; 536 537 for (i=0; i<n;) { 538 /* Now identify the consecutive vals belonging to the same row */ 539 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 540 if (j < n) ncols = j-i; 541 else ncols = n-i; 542 /* Now assemble all these values with a single function call */ 543 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 544 i = j; 545 } 546 } 547 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 548 /* Now process the block-stash. Since the values are stashed column-oriented, 549 set the roworiented flag to column oriented, and after MatSetValues() 550 restore the original flags */ 551 r1 = baij->roworiented; 552 r2 = a->roworiented; 553 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 554 baij->roworiented = PETSC_FALSE; 555 a->roworiented = PETSC_FALSE; 556 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 557 while (1) { 558 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 559 if (!flg) break; 560 561 for (i=0; i<n;) { 562 /* Now identify the consecutive vals belonging to the same row */ 563 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 564 if (j < n) ncols = j-i; 565 else ncols = n-i; 566 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 567 i = j; 568 } 569 } 570 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 571 baij->roworiented = r1; 572 a->roworiented = r2; 573 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 574 } 575 576 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 577 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 578 579 /* determine if any processor has disassembled, if so we must 580 also disassemble ourselfs, in order that we may reassemble. */ 581 /* 582 if nonzero structure of submatrix B cannot change then we know that 583 no processor disassembled thus we can skip this stuff 584 */ 585 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 586 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 587 if (mat->was_assembled && !other_disassembled) { 588 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 589 } 590 } 591 592 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 593 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 594 } 595 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 596 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 597 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 598 599 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 600 baij->rowvalues = 0; 601 602 PetscFunctionReturn(0); 603 } 604 605 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 608 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 609 { 610 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt bs = mat->rmap->bs; 613 PetscMPIInt size = baij->size,rank = baij->rank; 614 PetscTruth iascii,isdraw; 615 PetscViewer sviewer; 616 PetscViewerFormat format; 617 618 PetscFunctionBegin; 619 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 620 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 621 if (iascii) { 622 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 623 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 624 MatInfo info; 625 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 626 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 628 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 629 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 630 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 632 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 633 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 634 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 635 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 636 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } else if (format == PETSC_VIEWER_ASCII_INFO) { 639 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 640 PetscFunctionReturn(0); 641 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 642 PetscFunctionReturn(0); 643 } 644 } 645 646 if (isdraw) { 647 PetscDraw draw; 648 PetscTruth isnull; 649 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 650 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 651 } 652 653 if (size == 1) { 654 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 655 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 656 } else { 657 /* assemble the entire matrix onto first processor. */ 658 Mat A; 659 Mat_SeqSBAIJ *Aloc; 660 Mat_SeqBAIJ *Bloc; 661 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 662 MatScalar *a; 663 664 /* Should this be the same type as mat? */ 665 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 666 if (!rank) { 667 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 668 } else { 669 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 670 } 671 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 672 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 673 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 674 675 /* copy over the A part */ 676 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 677 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 678 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 679 680 for (i=0; i<mbs; i++) { 681 rvals[0] = bs*(baij->rstartbs + i); 682 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 683 for (j=ai[i]; j<ai[i+1]; j++) { 684 col = (baij->cstartbs+aj[j])*bs; 685 for (k=0; k<bs; k++) { 686 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 687 col++; a += bs; 688 } 689 } 690 } 691 /* copy over the B part */ 692 Bloc = (Mat_SeqBAIJ*)baij->B->data; 693 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 694 for (i=0; i<mbs; i++) { 695 696 rvals[0] = bs*(baij->rstartbs + i); 697 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 698 for (j=ai[i]; j<ai[i+1]; j++) { 699 col = baij->garray[aj[j]]*bs; 700 for (k=0; k<bs; k++) { 701 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 702 col++; a += bs; 703 } 704 } 705 } 706 ierr = PetscFree(rvals);CHKERRQ(ierr); 707 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 708 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 709 /* 710 Everyone has to call to draw the matrix since the graphics waits are 711 synchronized across all processors that share the PetscDraw object 712 */ 713 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 714 if (!rank) { 715 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 716 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 717 } 718 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 719 ierr = MatDestroy(A);CHKERRQ(ierr); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatView_MPISBAIJ" 726 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 727 { 728 PetscErrorCode ierr; 729 PetscTruth iascii,isdraw,issocket,isbinary; 730 731 PetscFunctionBegin; 732 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 733 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 734 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 735 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 736 if (iascii || isdraw || issocket || isbinary) { 737 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 738 } else { 739 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 740 } 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatDestroy_MPISBAIJ" 746 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 747 { 748 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 #if defined(PETSC_USE_LOG) 753 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 754 #endif 755 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 756 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 757 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 758 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 759 #if defined (PETSC_USE_CTABLE) 760 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 761 #else 762 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 763 #endif 764 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 765 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 766 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 767 if (baij->slvec0) { 768 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 769 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 770 } 771 if (baij->slvec1) { 772 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 773 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 774 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 775 } 776 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 777 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 778 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 779 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 780 if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);} 781 if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);} 782 if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);} 783 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 784 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 785 #endif 786 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 787 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 788 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 789 ierr = PetscFree(baij);CHKERRQ(ierr); 790 791 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 793 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 794 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 795 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 796 PetscFunctionReturn(0); 797 } 798 799 #undef __FUNCT__ 800 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 801 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 802 { 803 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 804 PetscErrorCode ierr; 805 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 806 PetscScalar *x,*from; 807 808 PetscFunctionBegin; 809 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 810 if (nt != A->cmap->n) { 811 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 812 } 813 814 /* diagonal part */ 815 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 816 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 817 818 /* subdiagonal part */ 819 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 820 821 /* copy x into the vec slvec0 */ 822 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 823 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 824 825 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 826 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 827 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 828 829 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 830 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 831 /* supperdiagonal part */ 832 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatMult_MPISBAIJ" 838 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 839 { 840 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 843 PetscScalar *x,*from; 844 845 PetscFunctionBegin; 846 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 847 if (nt != A->cmap->n) { 848 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 849 } 850 851 /* diagonal part */ 852 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 853 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 854 855 /* subdiagonal part */ 856 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 857 858 /* copy x into the vec slvec0 */ 859 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 860 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 861 862 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 863 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 864 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 865 866 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 /* supperdiagonal part */ 869 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 875 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 876 { 877 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscInt nt; 880 881 PetscFunctionBegin; 882 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 883 if (nt != A->cmap->n) { 884 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 885 } 886 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 887 if (nt != A->rmap->N) { 888 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 889 } 890 891 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 892 /* do diagonal part */ 893 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 894 /* do supperdiagonal part */ 895 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 897 /* do subdiagonal part */ 898 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 899 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 900 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 901 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNCT__ 906 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 907 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 908 { 909 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 910 PetscErrorCode ierr; 911 PetscInt mbs=a->mbs,bs=A->rmap->bs; 912 PetscScalar *x,*from,zero=0.0; 913 914 PetscFunctionBegin; 915 /* 916 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 917 PetscSynchronizedFlush(((PetscObject)A)->comm); 918 */ 919 /* diagonal part */ 920 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 921 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 922 923 /* subdiagonal part */ 924 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 925 926 /* copy x into the vec slvec0 */ 927 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 928 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 929 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 930 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 931 932 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 933 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 934 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 935 936 /* supperdiagonal part */ 937 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 938 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 944 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 945 { 946 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 947 PetscErrorCode ierr; 948 949 PetscFunctionBegin; 950 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 951 /* do diagonal part */ 952 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 953 /* do supperdiagonal part */ 954 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 955 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 956 957 /* do subdiagonal part */ 958 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 959 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 960 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 961 962 PetscFunctionReturn(0); 963 } 964 965 /* 966 This only works correctly for square matrices where the subblock A->A is the 967 diagonal block 968 */ 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 971 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 972 { 973 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 978 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MatScale_MPISBAIJ" 984 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 985 { 986 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 991 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 992 PetscFunctionReturn(0); 993 } 994 995 #undef __FUNCT__ 996 #define __FUNCT__ "MatGetRow_MPISBAIJ" 997 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 998 { 999 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1000 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1001 PetscErrorCode ierr; 1002 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1003 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1004 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1005 1006 PetscFunctionBegin; 1007 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1008 mat->getrowactive = PETSC_TRUE; 1009 1010 if (!mat->rowvalues && (idx || v)) { 1011 /* 1012 allocate enough space to hold information from the longest row. 1013 */ 1014 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1015 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1016 PetscInt max = 1,mbs = mat->mbs,tmp; 1017 for (i=0; i<mbs; i++) { 1018 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1019 if (max < tmp) { max = tmp; } 1020 } 1021 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1022 } 1023 1024 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1025 lrow = row - brstart; /* local row index */ 1026 1027 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1028 if (!v) {pvA = 0; pvB = 0;} 1029 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1030 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1031 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1032 nztot = nzA + nzB; 1033 1034 cmap = mat->garray; 1035 if (v || idx) { 1036 if (nztot) { 1037 /* Sort by increasing column numbers, assuming A and B already sorted */ 1038 PetscInt imark = -1; 1039 if (v) { 1040 *v = v_p = mat->rowvalues; 1041 for (i=0; i<nzB; i++) { 1042 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1043 else break; 1044 } 1045 imark = i; 1046 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1047 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1048 } 1049 if (idx) { 1050 *idx = idx_p = mat->rowindices; 1051 if (imark > -1) { 1052 for (i=0; i<imark; i++) { 1053 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1054 } 1055 } else { 1056 for (i=0; i<nzB; i++) { 1057 if (cmap[cworkB[i]/bs] < cstart) 1058 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1059 else break; 1060 } 1061 imark = i; 1062 } 1063 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1064 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1065 } 1066 } else { 1067 if (idx) *idx = 0; 1068 if (v) *v = 0; 1069 } 1070 } 1071 *nz = nztot; 1072 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1079 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1080 { 1081 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1082 1083 PetscFunctionBegin; 1084 if (!baij->getrowactive) { 1085 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1086 } 1087 baij->getrowactive = PETSC_FALSE; 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1093 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1094 { 1095 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1096 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1097 1098 PetscFunctionBegin; 1099 aA->getrow_utriangular = PETSC_TRUE; 1100 PetscFunctionReturn(0); 1101 } 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1104 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1105 { 1106 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1107 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1108 1109 PetscFunctionBegin; 1110 aA->getrow_utriangular = PETSC_FALSE; 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1116 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1117 { 1118 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1123 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1129 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1130 { 1131 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1136 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1142 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1143 { 1144 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1145 PetscErrorCode ierr; 1146 1147 PetscFunctionBegin; 1148 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1149 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1155 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1156 { 1157 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1158 Mat A = a->A,B = a->B; 1159 PetscErrorCode ierr; 1160 PetscReal isend[5],irecv[5]; 1161 1162 PetscFunctionBegin; 1163 info->block_size = (PetscReal)matin->rmap->bs; 1164 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1165 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1166 isend[3] = info->memory; isend[4] = info->mallocs; 1167 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1168 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1169 isend[3] += info->memory; isend[4] += info->mallocs; 1170 if (flag == MAT_LOCAL) { 1171 info->nz_used = isend[0]; 1172 info->nz_allocated = isend[1]; 1173 info->nz_unneeded = isend[2]; 1174 info->memory = isend[3]; 1175 info->mallocs = isend[4]; 1176 } else if (flag == MAT_GLOBAL_MAX) { 1177 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1178 info->nz_used = irecv[0]; 1179 info->nz_allocated = irecv[1]; 1180 info->nz_unneeded = irecv[2]; 1181 info->memory = irecv[3]; 1182 info->mallocs = irecv[4]; 1183 } else if (flag == MAT_GLOBAL_SUM) { 1184 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1185 info->nz_used = irecv[0]; 1186 info->nz_allocated = irecv[1]; 1187 info->nz_unneeded = irecv[2]; 1188 info->memory = irecv[3]; 1189 info->mallocs = irecv[4]; 1190 } else { 1191 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1192 } 1193 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1194 info->fill_ratio_needed = 0; 1195 info->factor_mallocs = 0; 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1201 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg) 1202 { 1203 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1204 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 switch (op) { 1209 case MAT_NEW_NONZERO_LOCATIONS: 1210 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1211 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1212 case MAT_KEEP_NONZERO_PATTERN: 1213 case MAT_NEW_NONZERO_LOCATION_ERR: 1214 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1215 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1216 break; 1217 case MAT_ROW_ORIENTED: 1218 a->roworiented = flg; 1219 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1220 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1221 break; 1222 case MAT_NEW_DIAGONALS: 1223 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1224 break; 1225 case MAT_IGNORE_OFF_PROC_ENTRIES: 1226 a->donotstash = flg; 1227 break; 1228 case MAT_USE_HASH_TABLE: 1229 a->ht_flag = flg; 1230 break; 1231 case MAT_HERMITIAN: 1232 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1233 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1234 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1235 break; 1236 case MAT_SYMMETRIC: 1237 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1238 break; 1239 case MAT_STRUCTURALLY_SYMMETRIC: 1240 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1241 break; 1242 case MAT_SYMMETRY_ETERNAL: 1243 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1244 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1245 break; 1246 case MAT_IGNORE_LOWER_TRIANGULAR: 1247 aA->ignore_ltriangular = flg; 1248 break; 1249 case MAT_ERROR_LOWER_TRIANGULAR: 1250 aA->ignore_ltriangular = flg; 1251 break; 1252 case MAT_GETROW_UPPERTRIANGULAR: 1253 aA->getrow_utriangular = flg; 1254 break; 1255 default: 1256 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1257 } 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1263 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1264 { 1265 PetscErrorCode ierr; 1266 PetscFunctionBegin; 1267 if (MAT_INITIAL_MATRIX || *B != A) { 1268 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1269 } 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1275 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1276 { 1277 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1278 Mat a=baij->A, b=baij->B; 1279 PetscErrorCode ierr; 1280 PetscInt nv,m,n; 1281 PetscTruth flg; 1282 1283 PetscFunctionBegin; 1284 if (ll != rr){ 1285 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1286 if (!flg) 1287 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1288 } 1289 if (!ll) PetscFunctionReturn(0); 1290 1291 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1292 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1293 1294 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1295 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1296 1297 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1298 1299 /* left diagonalscale the off-diagonal part */ 1300 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1301 1302 /* scale the diagonal part */ 1303 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1304 1305 /* right diagonalscale the off-diagonal part */ 1306 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1307 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1313 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1314 { 1315 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatEqual_MPISBAIJ" 1327 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1328 { 1329 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1330 Mat a,b,c,d; 1331 PetscTruth flg; 1332 PetscErrorCode ierr; 1333 1334 PetscFunctionBegin; 1335 a = matA->A; b = matA->B; 1336 c = matB->A; d = matB->B; 1337 1338 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1339 if (flg) { 1340 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1341 } 1342 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatCopy_MPISBAIJ" 1348 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1349 { 1350 PetscErrorCode ierr; 1351 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1352 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1353 1354 PetscFunctionBegin; 1355 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1356 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1357 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1358 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1359 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1360 } else { 1361 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1362 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1363 } 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1369 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1370 { 1371 PetscErrorCode ierr; 1372 1373 PetscFunctionBegin; 1374 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #undef __FUNCT__ 1379 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1380 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1381 { 1382 PetscErrorCode ierr; 1383 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1384 PetscBLASInt bnz,one=1; 1385 Mat_SeqSBAIJ *xa,*ya; 1386 Mat_SeqBAIJ *xb,*yb; 1387 1388 PetscFunctionBegin; 1389 if (str == SAME_NONZERO_PATTERN) { 1390 PetscScalar alpha = a; 1391 xa = (Mat_SeqSBAIJ *)xx->A->data; 1392 ya = (Mat_SeqSBAIJ *)yy->A->data; 1393 bnz = PetscBLASIntCast(xa->nz); 1394 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1395 xb = (Mat_SeqBAIJ *)xx->B->data; 1396 yb = (Mat_SeqBAIJ *)yy->B->data; 1397 bnz = PetscBLASIntCast(xb->nz); 1398 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1399 } else { 1400 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1401 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1402 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1403 } 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1409 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1410 { 1411 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1412 PetscInt rbs,cbs; 1413 PetscErrorCode ierr; 1414 1415 PetscFunctionBegin; 1416 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1417 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1418 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1419 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1420 if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1421 if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1427 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1428 { 1429 PetscErrorCode ierr; 1430 PetscInt i; 1431 PetscTruth flg; 1432 1433 PetscFunctionBegin; 1434 for (i=0; i<n; i++) { 1435 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1436 if (!flg) { 1437 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1438 } 1439 } 1440 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 1445 /* -------------------------------------------------------------------*/ 1446 static struct _MatOps MatOps_Values = { 1447 MatSetValues_MPISBAIJ, 1448 MatGetRow_MPISBAIJ, 1449 MatRestoreRow_MPISBAIJ, 1450 MatMult_MPISBAIJ, 1451 /* 4*/ MatMultAdd_MPISBAIJ, 1452 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1453 MatMultAdd_MPISBAIJ, 1454 0, 1455 0, 1456 0, 1457 /*10*/ 0, 1458 0, 1459 0, 1460 MatSOR_MPISBAIJ, 1461 MatTranspose_MPISBAIJ, 1462 /*15*/ MatGetInfo_MPISBAIJ, 1463 MatEqual_MPISBAIJ, 1464 MatGetDiagonal_MPISBAIJ, 1465 MatDiagonalScale_MPISBAIJ, 1466 MatNorm_MPISBAIJ, 1467 /*20*/ MatAssemblyBegin_MPISBAIJ, 1468 MatAssemblyEnd_MPISBAIJ, 1469 MatSetOption_MPISBAIJ, 1470 MatZeroEntries_MPISBAIJ, 1471 /*24*/ 0, 1472 0, 1473 0, 1474 0, 1475 0, 1476 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1477 0, 1478 0, 1479 0, 1480 0, 1481 /*34*/ MatDuplicate_MPISBAIJ, 1482 0, 1483 0, 1484 0, 1485 0, 1486 /*39*/ MatAXPY_MPISBAIJ, 1487 MatGetSubMatrices_MPISBAIJ, 1488 MatIncreaseOverlap_MPISBAIJ, 1489 MatGetValues_MPISBAIJ, 1490 MatCopy_MPISBAIJ, 1491 /*44*/ 0, 1492 MatScale_MPISBAIJ, 1493 0, 1494 0, 1495 0, 1496 /*49*/ MatSetBlockSize_MPISBAIJ, 1497 0, 1498 0, 1499 0, 1500 0, 1501 /*54*/ 0, 1502 0, 1503 MatSetUnfactored_MPISBAIJ, 1504 0, 1505 MatSetValuesBlocked_MPISBAIJ, 1506 /*59*/ 0, 1507 0, 1508 0, 1509 0, 1510 0, 1511 /*64*/ 0, 1512 0, 1513 0, 1514 0, 1515 0, 1516 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1517 0, 1518 0, 1519 0, 1520 0, 1521 /*74*/ 0, 1522 0, 1523 0, 1524 0, 1525 0, 1526 /*79*/ 0, 1527 0, 1528 0, 1529 0, 1530 MatLoad_MPISBAIJ, 1531 /*84*/ 0, 1532 0, 1533 0, 1534 0, 1535 0, 1536 /*89*/ 0, 1537 0, 1538 0, 1539 0, 1540 0, 1541 /*94*/ 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 /*99*/ 0, 1547 0, 1548 0, 1549 0, 1550 0, 1551 /*104*/0, 1552 MatRealPart_MPISBAIJ, 1553 MatImaginaryPart_MPISBAIJ, 1554 MatGetRowUpperTriangular_MPISBAIJ, 1555 MatRestoreRowUpperTriangular_MPISBAIJ 1556 }; 1557 1558 1559 EXTERN_C_BEGIN 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1562 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1563 { 1564 PetscFunctionBegin; 1565 *a = ((Mat_MPISBAIJ *)A->data)->A; 1566 *iscopy = PETSC_FALSE; 1567 PetscFunctionReturn(0); 1568 } 1569 EXTERN_C_END 1570 1571 EXTERN_C_BEGIN 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1574 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1575 { 1576 Mat_MPISBAIJ *b; 1577 PetscErrorCode ierr; 1578 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1579 1580 PetscFunctionBegin; 1581 if (bs < 0){ 1582 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1583 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1584 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1585 bs = PetscAbs(bs); 1586 } 1587 if ((d_nnz || o_nnz) && newbs != bs) { 1588 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1589 } 1590 bs = newbs; 1591 1592 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1593 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1594 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1595 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1596 1597 B->rmap->bs = B->cmap->bs = bs; 1598 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1599 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1600 1601 if (d_nnz) { 1602 for (i=0; i<B->rmap->n/bs; i++) { 1603 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1604 } 1605 } 1606 if (o_nnz) { 1607 for (i=0; i<B->rmap->n/bs; i++) { 1608 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1609 } 1610 } 1611 1612 b = (Mat_MPISBAIJ*)B->data; 1613 mbs = B->rmap->n/bs; 1614 Mbs = B->rmap->N/bs; 1615 if (mbs*bs != B->rmap->n) { 1616 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1617 } 1618 1619 B->rmap->bs = bs; 1620 b->bs2 = bs*bs; 1621 b->mbs = mbs; 1622 b->nbs = mbs; 1623 b->Mbs = Mbs; 1624 b->Nbs = Mbs; 1625 1626 for (i=0; i<=b->size; i++) { 1627 b->rangebs[i] = B->rmap->range[i]/bs; 1628 } 1629 b->rstartbs = B->rmap->rstart/bs; 1630 b->rendbs = B->rmap->rend/bs; 1631 1632 b->cstartbs = B->cmap->rstart/bs; 1633 b->cendbs = B->cmap->rend/bs; 1634 1635 if (!B->preallocated) { 1636 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1637 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1638 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1639 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1640 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1641 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1642 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1643 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1644 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1645 } 1646 1647 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1648 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1649 B->preallocated = PETSC_TRUE; 1650 PetscFunctionReturn(0); 1651 } 1652 EXTERN_C_END 1653 1654 EXTERN_C_BEGIN 1655 #if defined(PETSC_HAVE_MUMPS) 1656 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*); 1657 #endif 1658 #if defined(PETSC_HAVE_SPOOLES) 1659 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1660 #endif 1661 #if defined(PETSC_HAVE_PASTIX) 1662 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1663 #endif 1664 EXTERN_C_END 1665 1666 /*MC 1667 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1668 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1669 1670 Options Database Keys: 1671 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1672 1673 Level: beginner 1674 1675 .seealso: MatCreateMPISBAIJ 1676 M*/ 1677 1678 EXTERN_C_BEGIN 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatCreate_MPISBAIJ" 1681 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1682 { 1683 Mat_MPISBAIJ *b; 1684 PetscErrorCode ierr; 1685 PetscTruth flg; 1686 1687 PetscFunctionBegin; 1688 1689 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1690 B->data = (void*)b; 1691 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1692 1693 B->ops->destroy = MatDestroy_MPISBAIJ; 1694 B->ops->view = MatView_MPISBAIJ; 1695 B->mapping = 0; 1696 B->assembled = PETSC_FALSE; 1697 1698 B->insertmode = NOT_SET_VALUES; 1699 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1700 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1701 1702 /* build local table of row and column ownerships */ 1703 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1704 1705 /* build cache for off array entries formed */ 1706 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1707 b->donotstash = PETSC_FALSE; 1708 b->colmap = PETSC_NULL; 1709 b->garray = PETSC_NULL; 1710 b->roworiented = PETSC_TRUE; 1711 1712 /* stuff used in block assembly */ 1713 b->barray = 0; 1714 1715 /* stuff used for matrix vector multiply */ 1716 b->lvec = 0; 1717 b->Mvctx = 0; 1718 b->slvec0 = 0; 1719 b->slvec0b = 0; 1720 b->slvec1 = 0; 1721 b->slvec1a = 0; 1722 b->slvec1b = 0; 1723 b->sMvctx = 0; 1724 1725 /* stuff for MatGetRow() */ 1726 b->rowindices = 0; 1727 b->rowvalues = 0; 1728 b->getrowactive = PETSC_FALSE; 1729 1730 /* hash table stuff */ 1731 b->ht = 0; 1732 b->hd = 0; 1733 b->ht_size = 0; 1734 b->ht_flag = PETSC_FALSE; 1735 b->ht_fact = 0; 1736 b->ht_total_ct = 0; 1737 b->ht_insert_ct = 0; 1738 1739 b->in_loc = 0; 1740 b->v_loc = 0; 1741 b->n_loc = 0; 1742 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1743 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1744 if (flg) { 1745 PetscReal fact = 1.39; 1746 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1747 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1748 if (fact <= 1.0) fact = 1.39; 1749 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1750 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1751 } 1752 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1753 1754 #if defined(PETSC_HAVE_PASTIX) 1755 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1756 "MatGetFactor_mpisbaij_pastix", 1757 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1758 #endif 1759 #if defined(PETSC_HAVE_MUMPS) 1760 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1761 "MatGetFactor_mpisbaij_mumps", 1762 MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr); 1763 #endif 1764 #if defined(PETSC_HAVE_SPOOLES) 1765 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1766 "MatGetFactor_mpisbaij_spooles", 1767 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1768 #endif 1769 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1770 "MatStoreValues_MPISBAIJ", 1771 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1772 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1773 "MatRetrieveValues_MPISBAIJ", 1774 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1775 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1776 "MatGetDiagonalBlock_MPISBAIJ", 1777 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1778 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1779 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1780 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1781 B->symmetric = PETSC_TRUE; 1782 B->structurally_symmetric = PETSC_TRUE; 1783 B->symmetric_set = PETSC_TRUE; 1784 B->structurally_symmetric_set = PETSC_TRUE; 1785 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1786 PetscFunctionReturn(0); 1787 } 1788 EXTERN_C_END 1789 1790 /*MC 1791 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1792 1793 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1794 and MATMPISBAIJ otherwise. 1795 1796 Options Database Keys: 1797 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1798 1799 Level: beginner 1800 1801 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1802 M*/ 1803 1804 EXTERN_C_BEGIN 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatCreate_SBAIJ" 1807 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1808 { 1809 PetscErrorCode ierr; 1810 PetscMPIInt size; 1811 1812 PetscFunctionBegin; 1813 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1814 if (size == 1) { 1815 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1816 } else { 1817 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1818 } 1819 PetscFunctionReturn(0); 1820 } 1821 EXTERN_C_END 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1825 /*@C 1826 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1827 the user should preallocate the matrix storage by setting the parameters 1828 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1829 performance can be increased by more than a factor of 50. 1830 1831 Collective on Mat 1832 1833 Input Parameters: 1834 + A - the matrix 1835 . bs - size of blockk 1836 . d_nz - number of block nonzeros per block row in diagonal portion of local 1837 submatrix (same for all local rows) 1838 . d_nnz - array containing the number of block nonzeros in the various block rows 1839 in the upper triangular and diagonal part of the in diagonal portion of the local 1840 (possibly different for each block row) or PETSC_NULL. You must leave room 1841 for the diagonal entry even if it is zero. 1842 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1843 submatrix (same for all local rows). 1844 - o_nnz - array containing the number of nonzeros in the various block rows of the 1845 off-diagonal portion of the local submatrix (possibly different for 1846 each block row) or PETSC_NULL. 1847 1848 1849 Options Database Keys: 1850 . -mat_no_unroll - uses code that does not unroll the loops in the 1851 block calculations (much slower) 1852 . -mat_block_size - size of the blocks to use 1853 1854 Notes: 1855 1856 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1857 than it must be used on all processors that share the object for that argument. 1858 1859 If the *_nnz parameter is given then the *_nz parameter is ignored 1860 1861 Storage Information: 1862 For a square global matrix we define each processor's diagonal portion 1863 to be its local rows and the corresponding columns (a square submatrix); 1864 each processor's off-diagonal portion encompasses the remainder of the 1865 local matrix (a rectangular submatrix). 1866 1867 The user can specify preallocated storage for the diagonal part of 1868 the local submatrix with either d_nz or d_nnz (not both). Set 1869 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1870 memory allocation. Likewise, specify preallocated storage for the 1871 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1872 1873 You can call MatGetInfo() to get information on how effective the preallocation was; 1874 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1875 You can also run with the option -info and look for messages with the string 1876 malloc in them to see if additional memory allocation was needed. 1877 1878 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1879 the figure below we depict these three local rows and all columns (0-11). 1880 1881 .vb 1882 0 1 2 3 4 5 6 7 8 9 10 11 1883 ------------------- 1884 row 3 | o o o d d d o o o o o o 1885 row 4 | o o o d d d o o o o o o 1886 row 5 | o o o d d d o o o o o o 1887 ------------------- 1888 .ve 1889 1890 Thus, any entries in the d locations are stored in the d (diagonal) 1891 submatrix, and any entries in the o locations are stored in the 1892 o (off-diagonal) submatrix. Note that the d matrix is stored in 1893 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1894 1895 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1896 plus the diagonal part of the d matrix, 1897 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1898 In general, for PDE problems in which most nonzeros are near the diagonal, 1899 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1900 or you will get TERRIBLE performance; see the users' manual chapter on 1901 matrices. 1902 1903 Level: intermediate 1904 1905 .keywords: matrix, block, aij, compressed row, sparse, parallel 1906 1907 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1908 @*/ 1909 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1910 { 1911 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1912 1913 PetscFunctionBegin; 1914 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1915 if (f) { 1916 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1917 } 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatCreateMPISBAIJ" 1923 /*@C 1924 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1925 (block compressed row). For good matrix assembly performance 1926 the user should preallocate the matrix storage by setting the parameters 1927 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1928 performance can be increased by more than a factor of 50. 1929 1930 Collective on MPI_Comm 1931 1932 Input Parameters: 1933 + comm - MPI communicator 1934 . bs - size of blockk 1935 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1936 This value should be the same as the local size used in creating the 1937 y vector for the matrix-vector product y = Ax. 1938 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1939 This value should be the same as the local size used in creating the 1940 x vector for the matrix-vector product y = Ax. 1941 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1942 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1943 . d_nz - number of block nonzeros per block row in diagonal portion of local 1944 submatrix (same for all local rows) 1945 . d_nnz - array containing the number of block nonzeros in the various block rows 1946 in the upper triangular portion of the in diagonal portion of the local 1947 (possibly different for each block block row) or PETSC_NULL. 1948 You must leave room for the diagonal entry even if it is zero. 1949 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1950 submatrix (same for all local rows). 1951 - o_nnz - array containing the number of nonzeros in the various block rows of the 1952 off-diagonal portion of the local submatrix (possibly different for 1953 each block row) or PETSC_NULL. 1954 1955 Output Parameter: 1956 . A - the matrix 1957 1958 Options Database Keys: 1959 . -mat_no_unroll - uses code that does not unroll the loops in the 1960 block calculations (much slower) 1961 . -mat_block_size - size of the blocks to use 1962 . -mat_mpi - use the parallel matrix data structures even on one processor 1963 (defaults to using SeqBAIJ format on one processor) 1964 1965 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1966 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1967 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1968 1969 Notes: 1970 The number of rows and columns must be divisible by blocksize. 1971 This matrix type does not support complex Hermitian operation. 1972 1973 The user MUST specify either the local or global matrix dimensions 1974 (possibly both). 1975 1976 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1977 than it must be used on all processors that share the object for that argument. 1978 1979 If the *_nnz parameter is given then the *_nz parameter is ignored 1980 1981 Storage Information: 1982 For a square global matrix we define each processor's diagonal portion 1983 to be its local rows and the corresponding columns (a square submatrix); 1984 each processor's off-diagonal portion encompasses the remainder of the 1985 local matrix (a rectangular submatrix). 1986 1987 The user can specify preallocated storage for the diagonal part of 1988 the local submatrix with either d_nz or d_nnz (not both). Set 1989 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1990 memory allocation. Likewise, specify preallocated storage for the 1991 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1992 1993 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1994 the figure below we depict these three local rows and all columns (0-11). 1995 1996 .vb 1997 0 1 2 3 4 5 6 7 8 9 10 11 1998 ------------------- 1999 row 3 | o o o d d d o o o o o o 2000 row 4 | o o o d d d o o o o o o 2001 row 5 | o o o d d d o o o o o o 2002 ------------------- 2003 .ve 2004 2005 Thus, any entries in the d locations are stored in the d (diagonal) 2006 submatrix, and any entries in the o locations are stored in the 2007 o (off-diagonal) submatrix. Note that the d matrix is stored in 2008 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2009 2010 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2011 plus the diagonal part of the d matrix, 2012 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2013 In general, for PDE problems in which most nonzeros are near the diagonal, 2014 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2015 or you will get TERRIBLE performance; see the users' manual chapter on 2016 matrices. 2017 2018 Level: intermediate 2019 2020 .keywords: matrix, block, aij, compressed row, sparse, parallel 2021 2022 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2023 @*/ 2024 2025 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2026 { 2027 PetscErrorCode ierr; 2028 PetscMPIInt size; 2029 2030 PetscFunctionBegin; 2031 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2032 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2033 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2034 if (size > 1) { 2035 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2036 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2037 } else { 2038 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2039 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2040 } 2041 PetscFunctionReturn(0); 2042 } 2043 2044 2045 #undef __FUNCT__ 2046 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2047 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2048 { 2049 Mat mat; 2050 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2051 PetscErrorCode ierr; 2052 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2053 PetscScalar *array; 2054 2055 PetscFunctionBegin; 2056 *newmat = 0; 2057 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2058 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2059 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2060 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2061 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2062 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2063 2064 mat->factor = matin->factor; 2065 mat->preallocated = PETSC_TRUE; 2066 mat->assembled = PETSC_TRUE; 2067 mat->insertmode = NOT_SET_VALUES; 2068 2069 a = (Mat_MPISBAIJ*)mat->data; 2070 a->bs2 = oldmat->bs2; 2071 a->mbs = oldmat->mbs; 2072 a->nbs = oldmat->nbs; 2073 a->Mbs = oldmat->Mbs; 2074 a->Nbs = oldmat->Nbs; 2075 2076 2077 a->size = oldmat->size; 2078 a->rank = oldmat->rank; 2079 a->donotstash = oldmat->donotstash; 2080 a->roworiented = oldmat->roworiented; 2081 a->rowindices = 0; 2082 a->rowvalues = 0; 2083 a->getrowactive = PETSC_FALSE; 2084 a->barray = 0; 2085 a->rstartbs = oldmat->rstartbs; 2086 a->rendbs = oldmat->rendbs; 2087 a->cstartbs = oldmat->cstartbs; 2088 a->cendbs = oldmat->cendbs; 2089 2090 /* hash table stuff */ 2091 a->ht = 0; 2092 a->hd = 0; 2093 a->ht_size = 0; 2094 a->ht_flag = oldmat->ht_flag; 2095 a->ht_fact = oldmat->ht_fact; 2096 a->ht_total_ct = 0; 2097 a->ht_insert_ct = 0; 2098 2099 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2100 if (oldmat->colmap) { 2101 #if defined (PETSC_USE_CTABLE) 2102 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2103 #else 2104 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2105 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2106 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2107 #endif 2108 } else a->colmap = 0; 2109 2110 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2111 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2112 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2113 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2114 } else a->garray = 0; 2115 2116 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2117 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2118 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2119 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2120 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2121 2122 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2123 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2124 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2125 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2126 2127 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2128 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2129 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2130 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2131 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2132 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2133 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2134 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2135 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2136 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2137 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2138 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2139 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2140 2141 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2142 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2143 a->sMvctx = oldmat->sMvctx; 2144 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2145 2146 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2147 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2148 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2149 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2150 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2151 *newmat = mat; 2152 PetscFunctionReturn(0); 2153 } 2154 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatLoad_MPISBAIJ" 2157 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2158 { 2159 Mat A; 2160 PetscErrorCode ierr; 2161 PetscInt i,nz,j,rstart,rend; 2162 PetscScalar *vals,*buf; 2163 MPI_Comm comm = ((PetscObject)viewer)->comm; 2164 MPI_Status status; 2165 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2166 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2167 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2168 PetscInt bs=1,Mbs,mbs,extra_rows; 2169 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2170 PetscInt dcount,kmax,k,nzcount,tmp; 2171 int fd; 2172 2173 PetscFunctionBegin; 2174 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2175 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2176 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2177 2178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2179 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2180 if (!rank) { 2181 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2182 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2183 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2184 if (header[3] < 0) { 2185 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2186 } 2187 } 2188 2189 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2190 M = header[1]; N = header[2]; 2191 2192 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2193 2194 /* 2195 This code adds extra rows to make sure the number of rows is 2196 divisible by the blocksize 2197 */ 2198 Mbs = M/bs; 2199 extra_rows = bs - M + bs*(Mbs); 2200 if (extra_rows == bs) extra_rows = 0; 2201 else Mbs++; 2202 if (extra_rows &&!rank) { 2203 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2204 } 2205 2206 /* determine ownership of all rows */ 2207 mbs = Mbs/size + ((Mbs % size) > rank); 2208 m = mbs*bs; 2209 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2210 mmbs = PetscMPIIntCast(mbs); 2211 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2212 rowners[0] = 0; 2213 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2214 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2215 rstart = rowners[rank]; 2216 rend = rowners[rank+1]; 2217 2218 /* distribute row lengths to all processors */ 2219 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2220 if (!rank) { 2221 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2222 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2223 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2224 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2225 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2226 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2227 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2228 } else { 2229 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2230 } 2231 2232 if (!rank) { /* procs[0] */ 2233 /* calculate the number of nonzeros on each processor */ 2234 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2235 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2236 for (i=0; i<size; i++) { 2237 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2238 procsnz[i] += rowlengths[j]; 2239 } 2240 } 2241 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2242 2243 /* determine max buffer needed and allocate it */ 2244 maxnz = 0; 2245 for (i=0; i<size; i++) { 2246 maxnz = PetscMax(maxnz,procsnz[i]); 2247 } 2248 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2249 2250 /* read in my part of the matrix column indices */ 2251 nz = procsnz[0]; 2252 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2253 mycols = ibuf; 2254 if (size == 1) nz -= extra_rows; 2255 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2256 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2257 2258 /* read in every ones (except the last) and ship off */ 2259 for (i=1; i<size-1; i++) { 2260 nz = procsnz[i]; 2261 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2262 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2263 } 2264 /* read in the stuff for the last proc */ 2265 if (size != 1) { 2266 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2267 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2268 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2269 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2270 } 2271 ierr = PetscFree(cols);CHKERRQ(ierr); 2272 } else { /* procs[i], i>0 */ 2273 /* determine buffer space needed for message */ 2274 nz = 0; 2275 for (i=0; i<m; i++) { 2276 nz += locrowlens[i]; 2277 } 2278 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2279 mycols = ibuf; 2280 /* receive message of column indices*/ 2281 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2282 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2283 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2284 } 2285 2286 /* loop over local rows, determining number of off diagonal entries */ 2287 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2288 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2289 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2290 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2291 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2292 rowcount = 0; 2293 nzcount = 0; 2294 for (i=0; i<mbs; i++) { 2295 dcount = 0; 2296 odcount = 0; 2297 for (j=0; j<bs; j++) { 2298 kmax = locrowlens[rowcount]; 2299 for (k=0; k<kmax; k++) { 2300 tmp = mycols[nzcount++]/bs; /* block col. index */ 2301 if (!mask[tmp]) { 2302 mask[tmp] = 1; 2303 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2304 else masked1[dcount++] = tmp; /* entry in diag portion */ 2305 } 2306 } 2307 rowcount++; 2308 } 2309 2310 dlens[i] = dcount; /* d_nzz[i] */ 2311 odlens[i] = odcount; /* o_nzz[i] */ 2312 2313 /* zero out the mask elements we set */ 2314 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2315 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2316 } 2317 2318 /* create our matrix */ 2319 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2320 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2321 ierr = MatSetType(A,type);CHKERRQ(ierr); 2322 ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2323 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2324 2325 if (!rank) { 2326 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2327 /* read in my part of the matrix numerical values */ 2328 nz = procsnz[0]; 2329 vals = buf; 2330 mycols = ibuf; 2331 if (size == 1) nz -= extra_rows; 2332 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2333 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2334 2335 /* insert into matrix */ 2336 jj = rstart*bs; 2337 for (i=0; i<m; i++) { 2338 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2339 mycols += locrowlens[i]; 2340 vals += locrowlens[i]; 2341 jj++; 2342 } 2343 2344 /* read in other processors (except the last one) and ship out */ 2345 for (i=1; i<size-1; i++) { 2346 nz = procsnz[i]; 2347 vals = buf; 2348 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2349 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2350 } 2351 /* the last proc */ 2352 if (size != 1){ 2353 nz = procsnz[i] - extra_rows; 2354 vals = buf; 2355 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2356 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2357 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2358 } 2359 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2360 2361 } else { 2362 /* receive numeric values */ 2363 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2364 2365 /* receive message of values*/ 2366 vals = buf; 2367 mycols = ibuf; 2368 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2369 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2370 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2371 2372 /* insert into matrix */ 2373 jj = rstart*bs; 2374 for (i=0; i<m; i++) { 2375 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2376 mycols += locrowlens[i]; 2377 vals += locrowlens[i]; 2378 jj++; 2379 } 2380 } 2381 2382 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2383 ierr = PetscFree(buf);CHKERRQ(ierr); 2384 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2385 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2386 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2387 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2388 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2389 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2390 *newmat = A; 2391 PetscFunctionReturn(0); 2392 } 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2396 /*XXXXX@ 2397 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2398 2399 Input Parameters: 2400 . mat - the matrix 2401 . fact - factor 2402 2403 Collective on Mat 2404 2405 Level: advanced 2406 2407 Notes: 2408 This can also be set by the command line option: -mat_use_hash_table fact 2409 2410 .keywords: matrix, hashtable, factor, HT 2411 2412 .seealso: MatSetOption() 2413 @XXXXX*/ 2414 2415 2416 #undef __FUNCT__ 2417 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2418 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2419 { 2420 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2421 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2422 PetscReal atmp; 2423 PetscReal *work,*svalues,*rvalues; 2424 PetscErrorCode ierr; 2425 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2426 PetscMPIInt rank,size; 2427 PetscInt *rowners_bs,dest,count,source; 2428 PetscScalar *va; 2429 MatScalar *ba; 2430 MPI_Status stat; 2431 2432 PetscFunctionBegin; 2433 if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2434 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2435 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2436 2437 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2438 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2439 2440 bs = A->rmap->bs; 2441 mbs = a->mbs; 2442 Mbs = a->Mbs; 2443 ba = b->a; 2444 bi = b->i; 2445 bj = b->j; 2446 2447 /* find ownerships */ 2448 rowners_bs = A->rmap->range; 2449 2450 /* each proc creates an array to be distributed */ 2451 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2452 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2453 2454 /* row_max for B */ 2455 if (rank != size-1){ 2456 for (i=0; i<mbs; i++) { 2457 ncols = bi[1] - bi[0]; bi++; 2458 brow = bs*i; 2459 for (j=0; j<ncols; j++){ 2460 bcol = bs*(*bj); 2461 for (kcol=0; kcol<bs; kcol++){ 2462 col = bcol + kcol; /* local col index */ 2463 col += rowners_bs[rank+1]; /* global col index */ 2464 for (krow=0; krow<bs; krow++){ 2465 atmp = PetscAbsScalar(*ba); ba++; 2466 row = brow + krow; /* local row index */ 2467 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2468 if (work[col] < atmp) work[col] = atmp; 2469 } 2470 } 2471 bj++; 2472 } 2473 } 2474 2475 /* send values to its owners */ 2476 for (dest=rank+1; dest<size; dest++){ 2477 svalues = work + rowners_bs[dest]; 2478 count = rowners_bs[dest+1]-rowners_bs[dest]; 2479 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2480 } 2481 } 2482 2483 /* receive values */ 2484 if (rank){ 2485 rvalues = work; 2486 count = rowners_bs[rank+1]-rowners_bs[rank]; 2487 for (source=0; source<rank; source++){ 2488 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2489 /* process values */ 2490 for (i=0; i<count; i++){ 2491 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2492 } 2493 } 2494 } 2495 2496 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2497 ierr = PetscFree(work);CHKERRQ(ierr); 2498 PetscFunctionReturn(0); 2499 } 2500 2501 #undef __FUNCT__ 2502 #define __FUNCT__ "MatSOR_MPISBAIJ" 2503 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2504 { 2505 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2506 PetscErrorCode ierr; 2507 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2508 PetscScalar *x,*b,*ptr,*from; 2509 Vec bb1; 2510 2511 PetscFunctionBegin; 2512 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2513 if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2514 2515 if (flag == SOR_APPLY_UPPER) { 2516 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2521 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2522 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2523 its--; 2524 } 2525 2526 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2527 while (its--){ 2528 2529 /* lower triangular part: slvec0b = - B^T*xx */ 2530 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2531 2532 /* copy xx into slvec0a */ 2533 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2534 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2535 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2536 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2537 2538 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2539 2540 /* copy bb into slvec1a */ 2541 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2542 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2543 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2544 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2545 2546 /* set slvec1b = 0 */ 2547 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2548 2549 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2550 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2551 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2552 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2553 2554 /* upper triangular part: bb1 = bb1 - B*x */ 2555 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2556 2557 /* local diagonal sweep */ 2558 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2559 } 2560 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2561 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2562 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2563 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2564 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2565 } else if (flag & SOR_EISENSTAT) { 2566 Vec xx1; 2567 PetscTruth hasop; 2568 const PetscScalar *diag; 2569 PetscScalar *sl,scale = (omega - 2.0)/omega; 2570 PetscInt i,n; 2571 2572 if (!mat->xx1) { 2573 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2574 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2575 } 2576 xx1 = mat->xx1; 2577 bb1 = mat->bb1; 2578 2579 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2580 2581 if (!mat->diag) { 2582 /* this is wrong for same matrix with new nonzero values */ 2583 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2584 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2585 } 2586 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2587 2588 if (hasop) { 2589 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2590 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2591 } else { 2592 /* 2593 These two lines are replaced by code that may be a bit faster for a good compiler 2594 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2595 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2596 */ 2597 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2598 ierr = VecGetArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr); 2599 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2600 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2601 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2602 if (omega == 1.0) { 2603 for (i=0; i<n; i++) { 2604 sl[i] = b[i] - diag[i]*x[i]; 2605 } 2606 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2607 } else { 2608 for (i=0; i<n; i++) { 2609 sl[i] = b[i] + scale*diag[i]*x[i]; 2610 } 2611 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2612 } 2613 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2614 ierr = VecRestoreArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr); 2615 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2616 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2617 } 2618 2619 /* multiply off-diagonal portion of matrix */ 2620 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2621 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2622 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2623 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2624 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2625 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2626 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2627 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2628 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2629 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2630 2631 /* local sweep */ 2632 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2633 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2634 } else { 2635 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2636 } 2637 PetscFunctionReturn(0); 2638 } 2639 2640 #undef __FUNCT__ 2641 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2642 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2643 { 2644 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2645 PetscErrorCode ierr; 2646 Vec lvec1,bb1; 2647 2648 PetscFunctionBegin; 2649 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2650 if (matin->rmap->bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2651 2652 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2653 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2654 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2655 its--; 2656 } 2657 2658 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2659 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2660 while (its--){ 2661 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2662 2663 /* lower diagonal part: bb1 = bb - B^T*xx */ 2664 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2665 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2666 2667 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2668 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2669 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2670 2671 /* upper diagonal part: bb1 = bb1 - B*x */ 2672 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2673 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2674 2675 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2676 2677 /* diagonal sweep */ 2678 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2679 } 2680 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2681 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2682 } else { 2683 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2684 } 2685 PetscFunctionReturn(0); 2686 } 2687 2688