1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h" 5 #include "../src/mat/impls/sbaij/seq/sbaij.h" 6 #include "petscblaslapack.h" 7 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 EXTERN_C_BEGIN 25 #undef __FUNCT__ 26 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 28 { 29 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 30 PetscErrorCode ierr; 31 32 PetscFunctionBegin; 33 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 34 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 EXTERN_C_BEGIN 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 43 { 44 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 45 PetscErrorCode ierr; 46 47 PetscFunctionBegin; 48 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 49 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 EXTERN_C_END 53 54 55 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 56 { \ 57 \ 58 brow = row/bs; \ 59 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 60 rmax = aimax[brow]; nrow = ailen[brow]; \ 61 bcol = col/bs; \ 62 ridx = row % bs; cidx = col % bs; \ 63 low = 0; high = nrow; \ 64 while (high-low > 3) { \ 65 t = (low+high)/2; \ 66 if (rp[t] > bcol) high = t; \ 67 else low = t; \ 68 } \ 69 for (_i=low; _i<high; _i++) { \ 70 if (rp[_i] > bcol) break; \ 71 if (rp[_i] == bcol) { \ 72 bap = ap + bs2*_i + bs*cidx + ridx; \ 73 if (addv == ADD_VALUES) *bap += value; \ 74 else *bap = value; \ 75 goto a_noinsert; \ 76 } \ 77 } \ 78 if (a->nonew == 1) goto a_noinsert; \ 79 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 80 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 81 N = nrow++ - 1; \ 82 /* shift up all the later entries in this row */ \ 83 for (ii=N; ii>=_i; ii--) { \ 84 rp[ii+1] = rp[ii]; \ 85 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 86 } \ 87 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 88 rp[_i] = bcol; \ 89 ap[bs2*_i + bs*cidx + ridx] = value; \ 90 a_noinsert:; \ 91 ailen[brow] = nrow; \ 92 } 93 94 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 95 { \ 96 brow = row/bs; \ 97 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 98 rmax = bimax[brow]; nrow = bilen[brow]; \ 99 bcol = col/bs; \ 100 ridx = row % bs; cidx = col % bs; \ 101 low = 0; high = nrow; \ 102 while (high-low > 3) { \ 103 t = (low+high)/2; \ 104 if (rp[t] > bcol) high = t; \ 105 else low = t; \ 106 } \ 107 for (_i=low; _i<high; _i++) { \ 108 if (rp[_i] > bcol) break; \ 109 if (rp[_i] == bcol) { \ 110 bap = ap + bs2*_i + bs*cidx + ridx; \ 111 if (addv == ADD_VALUES) *bap += value; \ 112 else *bap = value; \ 113 goto b_noinsert; \ 114 } \ 115 } \ 116 if (b->nonew == 1) goto b_noinsert; \ 117 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 118 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 119 N = nrow++ - 1; \ 120 /* shift up all the later entries in this row */ \ 121 for (ii=N; ii>=_i; ii--) { \ 122 rp[ii+1] = rp[ii]; \ 123 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 124 } \ 125 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 126 rp[_i] = bcol; \ 127 ap[bs2*_i + bs*cidx + ridx] = value; \ 128 b_noinsert:; \ 129 bilen[brow] = nrow; \ 130 } 131 132 /* Only add/insert a(i,j) with i<=j (blocks). 133 Any a(i,j) with i>j input by user is ingored. 134 */ 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatSetValues_MPISBAIJ" 137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 138 { 139 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 140 MatScalar value; 141 PetscTruth roworiented = baij->roworiented; 142 PetscErrorCode ierr; 143 PetscInt i,j,row,col; 144 PetscInt rstart_orig=mat->rmap->rstart; 145 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 146 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 147 148 /* Some Variables required in the macro */ 149 Mat A = baij->A; 150 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 151 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 152 MatScalar *aa=a->a; 153 154 Mat B = baij->B; 155 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 156 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 157 MatScalar *ba=b->a; 158 159 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 160 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 161 MatScalar *ap,*bap; 162 163 /* for stash */ 164 PetscInt n_loc, *in_loc = PETSC_NULL; 165 MatScalar *v_loc = PETSC_NULL; 166 167 PetscFunctionBegin; 168 if (v) PetscValidScalarPointer(v,6); 169 if (!baij->donotstash){ 170 if (n > baij->n_loc) { 171 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 172 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 174 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 175 baij->n_loc = n; 176 } 177 in_loc = baij->in_loc; 178 v_loc = baij->v_loc; 179 } 180 181 for (i=0; i<m; i++) { 182 if (im[i] < 0) continue; 183 #if defined(PETSC_USE_DEBUG) 184 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 185 #endif 186 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 187 row = im[i] - rstart_orig; /* local row index */ 188 for (j=0; j<n; j++) { 189 if (im[i]/bs > in[j]/bs){ 190 if (a->ignore_ltriangular){ 191 continue; /* ignore lower triangular blocks */ 192 } else { 193 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 194 } 195 } 196 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 197 col = in[j] - cstart_orig; /* local col index */ 198 brow = row/bs; bcol = col/bs; 199 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 200 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 201 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 202 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 203 } else if (in[j] < 0) continue; 204 #if defined(PETSC_USE_DEBUG) 205 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 206 #endif 207 else { /* off-diag entry (B) */ 208 if (mat->was_assembled) { 209 if (!baij->colmap) { 210 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 211 } 212 #if defined (PETSC_USE_CTABLE) 213 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 214 col = col - 1; 215 #else 216 col = baij->colmap[in[j]/bs] - 1; 217 #endif 218 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 219 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 220 col = in[j]; 221 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 222 B = baij->B; 223 b = (Mat_SeqBAIJ*)(B)->data; 224 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 225 ba=b->a; 226 } else col += in[j]%bs; 227 } else col = in[j]; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } 232 } 233 } else { /* off processor entry */ 234 if (!baij->donotstash) { 235 n_loc = 0; 236 for (j=0; j<n; j++){ 237 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 238 in_loc[n_loc] = in[j]; 239 if (roworiented) { 240 v_loc[n_loc] = v[i*n+j]; 241 } else { 242 v_loc[n_loc] = v[j*m+i]; 243 } 244 n_loc++; 245 } 246 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 247 } 248 } 249 } 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 256 { 257 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 258 const MatScalar *value; 259 MatScalar *barray=baij->barray; 260 PetscTruth roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 261 PetscErrorCode ierr; 262 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 263 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 264 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 265 266 PetscFunctionBegin; 267 if(!barray) { 268 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 269 baij->barray = barray; 270 } 271 272 if (roworiented) { 273 stepval = (n-1)*bs; 274 } else { 275 stepval = (m-1)*bs; 276 } 277 for (i=0; i<m; i++) { 278 if (im[i] < 0) continue; 279 #if defined(PETSC_USE_DEBUG) 280 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 281 #endif 282 if (im[i] >= rstart && im[i] < rend) { 283 row = im[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (im[i] > in[j]) { 286 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 287 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 288 } 289 /* If NumCol = 1 then a copy is not required */ 290 if ((roworiented) && (n == 1)) { 291 barray = (MatScalar*) v + i*bs2; 292 } else if((!roworiented) && (m == 1)) { 293 barray = (MatScalar*) v + j*bs2; 294 } else { /* Here a copy is required */ 295 if (roworiented) { 296 value = v + i*(stepval+bs)*bs + j*bs; 297 } else { 298 value = v + j*(stepval+bs)*bs + i*bs; 299 } 300 for (ii=0; ii<bs; ii++,value+=stepval) { 301 for (jj=0; jj<bs; jj++) { 302 *barray++ = *value++; 303 } 304 } 305 barray -=bs2; 306 } 307 308 if (in[j] >= cstart && in[j] < cend){ 309 col = in[j] - cstart; 310 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 311 } 312 else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined (PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined (PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } 343 else col = in[j]; 344 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 345 } 346 } 347 } else { 348 if (!baij->donotstash) { 349 if (roworiented) { 350 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 351 } else { 352 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 353 } 354 } 355 } 356 } 357 PetscFunctionReturn(0); 358 } 359 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatGetValues_MPISBAIJ" 362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 363 { 364 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 365 PetscErrorCode ierr; 366 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 367 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 368 369 PetscFunctionBegin; 370 for (i=0; i<m; i++) { 371 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 372 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 373 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 374 row = idxm[i] - bsrstart; 375 for (j=0; j<n; j++) { 376 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 377 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 378 if (idxn[j] >= bscstart && idxn[j] < bscend){ 379 col = idxn[j] - bscstart; 380 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 381 } else { 382 if (!baij->colmap) { 383 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 384 } 385 #if defined (PETSC_USE_CTABLE) 386 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 387 data --; 388 #else 389 data = baij->colmap[idxn[j]/bs]-1; 390 #endif 391 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 392 else { 393 col = data + idxn[j]%bs; 394 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 395 } 396 } 397 } 398 } else { 399 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatNorm_MPISBAIJ" 407 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 408 { 409 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscReal sum[2],*lnorm2; 412 413 PetscFunctionBegin; 414 if (baij->size == 1) { 415 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 416 } else { 417 if (type == NORM_FROBENIUS) { 418 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 419 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 420 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 421 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 422 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 423 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 424 *norm = sqrt(sum[0] + 2*sum[1]); 425 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 426 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 427 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 428 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 429 PetscReal *rsum,*rsum2,vabs; 430 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 431 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 432 MatScalar *v; 433 434 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 435 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 436 /* Amat */ 437 v = amat->a; jj = amat->j; 438 for (brow=0; brow<mbs; brow++) { 439 grow = bs*(rstart + brow); 440 nz = amat->i[brow+1] - amat->i[brow]; 441 for (bcol=0; bcol<nz; bcol++){ 442 gcol = bs*(rstart + *jj); jj++; 443 for (col=0; col<bs; col++){ 444 for (row=0; row<bs; row++){ 445 vabs = PetscAbsScalar(*v); v++; 446 rsum[gcol+col] += vabs; 447 /* non-diagonal block */ 448 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 449 } 450 } 451 } 452 } 453 /* Bmat */ 454 v = bmat->a; jj = bmat->j; 455 for (brow=0; brow<mbs; brow++) { 456 grow = bs*(rstart + brow); 457 nz = bmat->i[brow+1] - bmat->i[brow]; 458 for (bcol=0; bcol<nz; bcol++){ 459 gcol = bs*garray[*jj]; jj++; 460 for (col=0; col<bs; col++){ 461 for (row=0; row<bs; row++){ 462 vabs = PetscAbsScalar(*v); v++; 463 rsum[gcol+col] += vabs; 464 rsum[grow+row] += vabs; 465 } 466 } 467 } 468 } 469 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 470 *norm = 0.0; 471 for (col=0; col<mat->cmap->N; col++) { 472 if (rsum2[col] > *norm) *norm = rsum2[col]; 473 } 474 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 475 } else { 476 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 477 } 478 } 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 484 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 485 { 486 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 487 PetscErrorCode ierr; 488 PetscInt nstash,reallocs; 489 InsertMode addv; 490 491 PetscFunctionBegin; 492 if (baij->donotstash) { 493 PetscFunctionReturn(0); 494 } 495 496 /* make sure all processors are either in INSERTMODE or ADDMODE */ 497 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 498 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 499 mat->insertmode = addv; /* in case this processor had no cache */ 500 501 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 502 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 503 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 504 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 505 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 506 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 513 { 514 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 515 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 516 PetscErrorCode ierr; 517 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 518 PetscInt *row,*col; 519 PetscTruth other_disassembled; 520 PetscMPIInt n; 521 PetscTruth r1,r2,r3; 522 MatScalar *val; 523 InsertMode addv = mat->insertmode; 524 525 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 526 PetscFunctionBegin; 527 528 if (!baij->donotstash) { 529 while (1) { 530 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 531 if (!flg) break; 532 533 for (i=0; i<n;) { 534 /* Now identify the consecutive vals belonging to the same row */ 535 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 536 if (j < n) ncols = j-i; 537 else ncols = n-i; 538 /* Now assemble all these values with a single function call */ 539 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 540 i = j; 541 } 542 } 543 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 544 /* Now process the block-stash. Since the values are stashed column-oriented, 545 set the roworiented flag to column oriented, and after MatSetValues() 546 restore the original flags */ 547 r1 = baij->roworiented; 548 r2 = a->roworiented; 549 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 550 baij->roworiented = PETSC_FALSE; 551 a->roworiented = PETSC_FALSE; 552 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 553 while (1) { 554 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 555 if (!flg) break; 556 557 for (i=0; i<n;) { 558 /* Now identify the consecutive vals belonging to the same row */ 559 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 560 if (j < n) ncols = j-i; 561 else ncols = n-i; 562 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 563 i = j; 564 } 565 } 566 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 567 baij->roworiented = r1; 568 a->roworiented = r2; 569 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 570 } 571 572 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 573 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 574 575 /* determine if any processor has disassembled, if so we must 576 also disassemble ourselfs, in order that we may reassemble. */ 577 /* 578 if nonzero structure of submatrix B cannot change then we know that 579 no processor disassembled thus we can skip this stuff 580 */ 581 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 582 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 583 if (mat->was_assembled && !other_disassembled) { 584 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 585 } 586 } 587 588 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 589 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 590 } 591 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 592 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 594 595 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 596 baij->rowvalues = 0; 597 598 PetscFunctionReturn(0); 599 } 600 601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 605 { 606 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 607 PetscErrorCode ierr; 608 PetscInt bs = mat->rmap->bs; 609 PetscMPIInt size = baij->size,rank = baij->rank; 610 PetscTruth iascii,isdraw; 611 PetscViewer sviewer; 612 PetscViewerFormat format; 613 614 PetscFunctionBegin; 615 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 616 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 617 if (iascii) { 618 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 619 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 620 MatInfo info; 621 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 622 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 623 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 624 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 625 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 626 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 628 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 630 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 631 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 632 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 633 PetscFunctionReturn(0); 634 } else if (format == PETSC_VIEWER_ASCII_INFO) { 635 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 638 PetscFunctionReturn(0); 639 } 640 } 641 642 if (isdraw) { 643 PetscDraw draw; 644 PetscTruth isnull; 645 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 646 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 647 } 648 649 if (size == 1) { 650 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 651 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 652 } else { 653 /* assemble the entire matrix onto first processor. */ 654 Mat A; 655 Mat_SeqSBAIJ *Aloc; 656 Mat_SeqBAIJ *Bloc; 657 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 658 MatScalar *a; 659 660 /* Should this be the same type as mat? */ 661 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 662 if (!rank) { 663 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 664 } else { 665 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 666 } 667 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 668 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 669 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 670 671 /* copy over the A part */ 672 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 673 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 674 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 675 676 for (i=0; i<mbs; i++) { 677 rvals[0] = bs*(baij->rstartbs + i); 678 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 679 for (j=ai[i]; j<ai[i+1]; j++) { 680 col = (baij->cstartbs+aj[j])*bs; 681 for (k=0; k<bs; k++) { 682 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 683 col++; a += bs; 684 } 685 } 686 } 687 /* copy over the B part */ 688 Bloc = (Mat_SeqBAIJ*)baij->B->data; 689 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 690 for (i=0; i<mbs; i++) { 691 692 rvals[0] = bs*(baij->rstartbs + i); 693 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 694 for (j=ai[i]; j<ai[i+1]; j++) { 695 col = baij->garray[aj[j]]*bs; 696 for (k=0; k<bs; k++) { 697 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 698 col++; a += bs; 699 } 700 } 701 } 702 ierr = PetscFree(rvals);CHKERRQ(ierr); 703 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 705 /* 706 Everyone has to call to draw the matrix since the graphics waits are 707 synchronized across all processors that share the PetscDraw object 708 */ 709 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 710 if (!rank) { 711 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 712 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 713 } 714 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 715 ierr = MatDestroy(A);CHKERRQ(ierr); 716 } 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNCT__ 721 #define __FUNCT__ "MatView_MPISBAIJ" 722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 723 { 724 PetscErrorCode ierr; 725 PetscTruth iascii,isdraw,issocket,isbinary; 726 727 PetscFunctionBegin; 728 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 730 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 732 if (iascii || isdraw || issocket || isbinary) { 733 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 734 } else { 735 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatDestroy_MPISBAIJ" 742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 743 { 744 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 #if defined(PETSC_USE_LOG) 749 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 750 #endif 751 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 752 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 753 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 754 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 755 #if defined (PETSC_USE_CTABLE) 756 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 757 #else 758 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 759 #endif 760 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 761 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 762 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 763 if (baij->slvec0) { 764 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 765 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 766 } 767 if (baij->slvec1) { 768 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 769 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 770 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 771 } 772 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 773 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 774 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 775 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 776 if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);} 777 if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);} 778 if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);} 779 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 780 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 781 #endif 782 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 783 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 784 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 785 ierr = PetscFree(baij);CHKERRQ(ierr); 786 787 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 798 { 799 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 800 PetscErrorCode ierr; 801 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 802 PetscScalar *x,*from; 803 804 PetscFunctionBegin; 805 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 806 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 807 808 /* diagonal part */ 809 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 810 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 811 812 /* subdiagonal part */ 813 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 814 815 /* copy x into the vec slvec0 */ 816 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 819 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 820 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 821 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 822 823 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 /* supperdiagonal part */ 826 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatMult_MPISBAIJ" 832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 833 { 834 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 835 PetscErrorCode ierr; 836 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 837 PetscScalar *x,*from; 838 839 PetscFunctionBegin; 840 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 841 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 842 843 /* diagonal part */ 844 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 845 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 846 847 /* subdiagonal part */ 848 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 849 850 /* copy x into the vec slvec0 */ 851 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 852 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 853 854 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 855 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 856 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 857 858 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 /* supperdiagonal part */ 861 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 868 { 869 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 870 PetscErrorCode ierr; 871 PetscInt nt; 872 873 PetscFunctionBegin; 874 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 875 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 876 877 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 878 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 879 880 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 /* do diagonal part */ 882 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 883 /* do supperdiagonal part */ 884 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 886 /* do subdiagonal part */ 887 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 888 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 889 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 890 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 896 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 897 { 898 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 899 PetscErrorCode ierr; 900 PetscInt mbs=a->mbs,bs=A->rmap->bs; 901 PetscScalar *x,*from,zero=0.0; 902 903 PetscFunctionBegin; 904 /* 905 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 906 PetscSynchronizedFlush(((PetscObject)A)->comm); 907 */ 908 /* diagonal part */ 909 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 910 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 911 912 /* subdiagonal part */ 913 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 914 915 /* copy x into the vec slvec0 */ 916 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 917 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 918 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 919 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 920 921 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 923 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924 925 /* supperdiagonal part */ 926 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 927 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 934 { 935 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 936 PetscErrorCode ierr; 937 938 PetscFunctionBegin; 939 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 /* do diagonal part */ 941 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 942 /* do supperdiagonal part */ 943 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 945 946 /* do subdiagonal part */ 947 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 948 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 949 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 950 951 PetscFunctionReturn(0); 952 } 953 954 /* 955 This only works correctly for square matrices where the subblock A->A is the 956 diagonal block 957 */ 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 961 { 962 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 967 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatScale_MPISBAIJ" 973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 974 { 975 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 980 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatGetRow_MPISBAIJ" 986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 987 { 988 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 989 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 990 PetscErrorCode ierr; 991 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 992 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 993 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 994 995 PetscFunctionBegin; 996 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 997 mat->getrowactive = PETSC_TRUE; 998 999 if (!mat->rowvalues && (idx || v)) { 1000 /* 1001 allocate enough space to hold information from the longest row. 1002 */ 1003 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1004 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1005 PetscInt max = 1,mbs = mat->mbs,tmp; 1006 for (i=0; i<mbs; i++) { 1007 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1008 if (max < tmp) { max = tmp; } 1009 } 1010 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1011 } 1012 1013 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1014 lrow = row - brstart; /* local row index */ 1015 1016 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1017 if (!v) {pvA = 0; pvB = 0;} 1018 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1019 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1020 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1021 nztot = nzA + nzB; 1022 1023 cmap = mat->garray; 1024 if (v || idx) { 1025 if (nztot) { 1026 /* Sort by increasing column numbers, assuming A and B already sorted */ 1027 PetscInt imark = -1; 1028 if (v) { 1029 *v = v_p = mat->rowvalues; 1030 for (i=0; i<nzB; i++) { 1031 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1032 else break; 1033 } 1034 imark = i; 1035 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1036 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1037 } 1038 if (idx) { 1039 *idx = idx_p = mat->rowindices; 1040 if (imark > -1) { 1041 for (i=0; i<imark; i++) { 1042 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1043 } 1044 } else { 1045 for (i=0; i<nzB; i++) { 1046 if (cmap[cworkB[i]/bs] < cstart) 1047 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1048 else break; 1049 } 1050 imark = i; 1051 } 1052 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1053 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1054 } 1055 } else { 1056 if (idx) *idx = 0; 1057 if (v) *v = 0; 1058 } 1059 } 1060 *nz = nztot; 1061 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1062 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1069 { 1070 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1071 1072 PetscFunctionBegin; 1073 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1074 baij->getrowactive = PETSC_FALSE; 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1081 { 1082 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1083 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1084 1085 PetscFunctionBegin; 1086 aA->getrow_utriangular = PETSC_TRUE; 1087 PetscFunctionReturn(0); 1088 } 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1092 { 1093 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1094 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1095 1096 PetscFunctionBegin; 1097 aA->getrow_utriangular = PETSC_FALSE; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1104 { 1105 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1110 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1117 { 1118 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1123 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1130 { 1131 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1136 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1143 { 1144 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1145 Mat A = a->A,B = a->B; 1146 PetscErrorCode ierr; 1147 PetscReal isend[5],irecv[5]; 1148 1149 PetscFunctionBegin; 1150 info->block_size = (PetscReal)matin->rmap->bs; 1151 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1152 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1153 isend[3] = info->memory; isend[4] = info->mallocs; 1154 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1155 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1156 isend[3] += info->memory; isend[4] += info->mallocs; 1157 if (flag == MAT_LOCAL) { 1158 info->nz_used = isend[0]; 1159 info->nz_allocated = isend[1]; 1160 info->nz_unneeded = isend[2]; 1161 info->memory = isend[3]; 1162 info->mallocs = isend[4]; 1163 } else if (flag == MAT_GLOBAL_MAX) { 1164 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1165 info->nz_used = irecv[0]; 1166 info->nz_allocated = irecv[1]; 1167 info->nz_unneeded = irecv[2]; 1168 info->memory = irecv[3]; 1169 info->mallocs = irecv[4]; 1170 } else if (flag == MAT_GLOBAL_SUM) { 1171 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1172 info->nz_used = irecv[0]; 1173 info->nz_allocated = irecv[1]; 1174 info->nz_unneeded = irecv[2]; 1175 info->memory = irecv[3]; 1176 info->mallocs = irecv[4]; 1177 } else { 1178 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1179 } 1180 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1181 info->fill_ratio_needed = 0; 1182 info->factor_mallocs = 0; 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg) 1189 { 1190 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1191 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 switch (op) { 1196 case MAT_NEW_NONZERO_LOCATIONS: 1197 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1198 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1199 case MAT_KEEP_NONZERO_PATTERN: 1200 case MAT_NEW_NONZERO_LOCATION_ERR: 1201 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1202 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1203 break; 1204 case MAT_ROW_ORIENTED: 1205 a->roworiented = flg; 1206 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1207 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1208 break; 1209 case MAT_NEW_DIAGONALS: 1210 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1211 break; 1212 case MAT_IGNORE_OFF_PROC_ENTRIES: 1213 a->donotstash = flg; 1214 break; 1215 case MAT_USE_HASH_TABLE: 1216 a->ht_flag = flg; 1217 break; 1218 case MAT_HERMITIAN: 1219 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1220 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1221 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1222 break; 1223 case MAT_SYMMETRIC: 1224 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1225 break; 1226 case MAT_STRUCTURALLY_SYMMETRIC: 1227 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1228 break; 1229 case MAT_SYMMETRY_ETERNAL: 1230 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1231 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1232 break; 1233 case MAT_IGNORE_LOWER_TRIANGULAR: 1234 aA->ignore_ltriangular = flg; 1235 break; 1236 case MAT_ERROR_LOWER_TRIANGULAR: 1237 aA->ignore_ltriangular = flg; 1238 break; 1239 case MAT_GETROW_UPPERTRIANGULAR: 1240 aA->getrow_utriangular = flg; 1241 break; 1242 default: 1243 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1244 } 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1250 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1251 { 1252 PetscErrorCode ierr; 1253 PetscFunctionBegin; 1254 if (MAT_INITIAL_MATRIX || *B != A) { 1255 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1262 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1263 { 1264 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1265 Mat a=baij->A, b=baij->B; 1266 PetscErrorCode ierr; 1267 PetscInt nv,m,n; 1268 PetscTruth flg; 1269 1270 PetscFunctionBegin; 1271 if (ll != rr){ 1272 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1273 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1274 } 1275 if (!ll) PetscFunctionReturn(0); 1276 1277 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1278 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1279 1280 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1281 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1282 1283 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 1285 /* left diagonalscale the off-diagonal part */ 1286 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1287 1288 /* scale the diagonal part */ 1289 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1290 1291 /* right diagonalscale the off-diagonal part */ 1292 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1293 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1299 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1300 { 1301 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1302 PetscErrorCode ierr; 1303 1304 PetscFunctionBegin; 1305 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatEqual_MPISBAIJ" 1313 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1314 { 1315 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1316 Mat a,b,c,d; 1317 PetscTruth flg; 1318 PetscErrorCode ierr; 1319 1320 PetscFunctionBegin; 1321 a = matA->A; b = matA->B; 1322 c = matB->A; d = matB->B; 1323 1324 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1325 if (flg) { 1326 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1327 } 1328 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatCopy_MPISBAIJ" 1334 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1335 { 1336 PetscErrorCode ierr; 1337 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1338 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1339 1340 PetscFunctionBegin; 1341 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1342 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1343 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1344 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1345 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1346 } else { 1347 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1348 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1349 } 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1355 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1356 { 1357 PetscErrorCode ierr; 1358 1359 PetscFunctionBegin; 1360 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1366 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1367 { 1368 PetscErrorCode ierr; 1369 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1370 PetscBLASInt bnz,one=1; 1371 Mat_SeqSBAIJ *xa,*ya; 1372 Mat_SeqBAIJ *xb,*yb; 1373 1374 PetscFunctionBegin; 1375 if (str == SAME_NONZERO_PATTERN) { 1376 PetscScalar alpha = a; 1377 xa = (Mat_SeqSBAIJ *)xx->A->data; 1378 ya = (Mat_SeqSBAIJ *)yy->A->data; 1379 bnz = PetscBLASIntCast(xa->nz); 1380 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1381 xb = (Mat_SeqBAIJ *)xx->B->data; 1382 yb = (Mat_SeqBAIJ *)yy->B->data; 1383 bnz = PetscBLASIntCast(xb->nz); 1384 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1385 } else { 1386 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1387 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1388 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1389 } 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1395 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1396 { 1397 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1398 PetscInt rbs,cbs; 1399 PetscErrorCode ierr; 1400 1401 PetscFunctionBegin; 1402 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1403 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1404 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1405 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1406 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1407 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1413 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1414 { 1415 PetscErrorCode ierr; 1416 PetscInt i; 1417 PetscTruth flg; 1418 1419 PetscFunctionBegin; 1420 for (i=0; i<n; i++) { 1421 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1422 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1423 } 1424 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 1429 /* -------------------------------------------------------------------*/ 1430 static struct _MatOps MatOps_Values = { 1431 MatSetValues_MPISBAIJ, 1432 MatGetRow_MPISBAIJ, 1433 MatRestoreRow_MPISBAIJ, 1434 MatMult_MPISBAIJ, 1435 /* 4*/ MatMultAdd_MPISBAIJ, 1436 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1437 MatMultAdd_MPISBAIJ, 1438 0, 1439 0, 1440 0, 1441 /*10*/ 0, 1442 0, 1443 0, 1444 MatSOR_MPISBAIJ, 1445 MatTranspose_MPISBAIJ, 1446 /*15*/ MatGetInfo_MPISBAIJ, 1447 MatEqual_MPISBAIJ, 1448 MatGetDiagonal_MPISBAIJ, 1449 MatDiagonalScale_MPISBAIJ, 1450 MatNorm_MPISBAIJ, 1451 /*20*/ MatAssemblyBegin_MPISBAIJ, 1452 MatAssemblyEnd_MPISBAIJ, 1453 MatSetOption_MPISBAIJ, 1454 MatZeroEntries_MPISBAIJ, 1455 /*24*/ 0, 1456 0, 1457 0, 1458 0, 1459 0, 1460 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1461 0, 1462 0, 1463 0, 1464 0, 1465 /*34*/ MatDuplicate_MPISBAIJ, 1466 0, 1467 0, 1468 0, 1469 0, 1470 /*39*/ MatAXPY_MPISBAIJ, 1471 MatGetSubMatrices_MPISBAIJ, 1472 MatIncreaseOverlap_MPISBAIJ, 1473 MatGetValues_MPISBAIJ, 1474 MatCopy_MPISBAIJ, 1475 /*44*/ 0, 1476 MatScale_MPISBAIJ, 1477 0, 1478 0, 1479 0, 1480 /*49*/ MatSetBlockSize_MPISBAIJ, 1481 0, 1482 0, 1483 0, 1484 0, 1485 /*54*/ 0, 1486 0, 1487 MatSetUnfactored_MPISBAIJ, 1488 0, 1489 MatSetValuesBlocked_MPISBAIJ, 1490 /*59*/ 0, 1491 0, 1492 0, 1493 0, 1494 0, 1495 /*64*/ 0, 1496 0, 1497 0, 1498 0, 1499 0, 1500 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1501 0, 1502 0, 1503 0, 1504 0, 1505 /*74*/ 0, 1506 0, 1507 0, 1508 0, 1509 0, 1510 /*79*/ 0, 1511 0, 1512 0, 1513 0, 1514 MatLoad_MPISBAIJ, 1515 /*84*/ 0, 1516 0, 1517 0, 1518 0, 1519 0, 1520 /*89*/ 0, 1521 0, 1522 0, 1523 0, 1524 0, 1525 /*94*/ 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*99*/ 0, 1531 0, 1532 0, 1533 0, 1534 0, 1535 /*104*/0, 1536 MatRealPart_MPISBAIJ, 1537 MatImaginaryPart_MPISBAIJ, 1538 MatGetRowUpperTriangular_MPISBAIJ, 1539 MatRestoreRowUpperTriangular_MPISBAIJ 1540 }; 1541 1542 1543 EXTERN_C_BEGIN 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1546 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1547 { 1548 PetscFunctionBegin; 1549 *a = ((Mat_MPISBAIJ *)A->data)->A; 1550 *iscopy = PETSC_FALSE; 1551 PetscFunctionReturn(0); 1552 } 1553 EXTERN_C_END 1554 1555 EXTERN_C_BEGIN 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1558 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1559 { 1560 Mat_MPISBAIJ *b; 1561 PetscErrorCode ierr; 1562 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1563 1564 PetscFunctionBegin; 1565 if (bs < 0){ 1566 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1567 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1568 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1569 bs = PetscAbs(bs); 1570 } 1571 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1572 bs = newbs; 1573 1574 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1575 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1576 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1577 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1578 1579 B->rmap->bs = B->cmap->bs = bs; 1580 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1581 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1582 1583 if (d_nnz) { 1584 for (i=0; i<B->rmap->n/bs; i++) { 1585 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1586 } 1587 } 1588 if (o_nnz) { 1589 for (i=0; i<B->rmap->n/bs; i++) { 1590 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1591 } 1592 } 1593 1594 b = (Mat_MPISBAIJ*)B->data; 1595 mbs = B->rmap->n/bs; 1596 Mbs = B->rmap->N/bs; 1597 if (mbs*bs != B->rmap->n) { 1598 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1599 } 1600 1601 B->rmap->bs = bs; 1602 b->bs2 = bs*bs; 1603 b->mbs = mbs; 1604 b->nbs = mbs; 1605 b->Mbs = Mbs; 1606 b->Nbs = Mbs; 1607 1608 for (i=0; i<=b->size; i++) { 1609 b->rangebs[i] = B->rmap->range[i]/bs; 1610 } 1611 b->rstartbs = B->rmap->rstart/bs; 1612 b->rendbs = B->rmap->rend/bs; 1613 1614 b->cstartbs = B->cmap->rstart/bs; 1615 b->cendbs = B->cmap->rend/bs; 1616 1617 if (!B->preallocated) { 1618 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1619 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1620 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1621 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1622 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1623 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1624 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1625 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1626 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1627 } 1628 1629 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1630 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1631 B->preallocated = PETSC_TRUE; 1632 PetscFunctionReturn(0); 1633 } 1634 EXTERN_C_END 1635 1636 EXTERN_C_BEGIN 1637 #if defined(PETSC_HAVE_MUMPS) 1638 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1639 #endif 1640 #if defined(PETSC_HAVE_SPOOLES) 1641 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1642 #endif 1643 #if defined(PETSC_HAVE_PASTIX) 1644 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1645 #endif 1646 EXTERN_C_END 1647 1648 /*MC 1649 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1650 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1651 1652 Options Database Keys: 1653 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1654 1655 Level: beginner 1656 1657 .seealso: MatCreateMPISBAIJ 1658 M*/ 1659 1660 EXTERN_C_BEGIN 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatCreate_MPISBAIJ" 1663 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1664 { 1665 Mat_MPISBAIJ *b; 1666 PetscErrorCode ierr; 1667 PetscTruth flg; 1668 1669 PetscFunctionBegin; 1670 1671 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1672 B->data = (void*)b; 1673 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1674 1675 B->ops->destroy = MatDestroy_MPISBAIJ; 1676 B->ops->view = MatView_MPISBAIJ; 1677 B->mapping = 0; 1678 B->assembled = PETSC_FALSE; 1679 1680 B->insertmode = NOT_SET_VALUES; 1681 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1682 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1683 1684 /* build local table of row and column ownerships */ 1685 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1686 1687 /* build cache for off array entries formed */ 1688 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1689 b->donotstash = PETSC_FALSE; 1690 b->colmap = PETSC_NULL; 1691 b->garray = PETSC_NULL; 1692 b->roworiented = PETSC_TRUE; 1693 1694 /* stuff used in block assembly */ 1695 b->barray = 0; 1696 1697 /* stuff used for matrix vector multiply */ 1698 b->lvec = 0; 1699 b->Mvctx = 0; 1700 b->slvec0 = 0; 1701 b->slvec0b = 0; 1702 b->slvec1 = 0; 1703 b->slvec1a = 0; 1704 b->slvec1b = 0; 1705 b->sMvctx = 0; 1706 1707 /* stuff for MatGetRow() */ 1708 b->rowindices = 0; 1709 b->rowvalues = 0; 1710 b->getrowactive = PETSC_FALSE; 1711 1712 /* hash table stuff */ 1713 b->ht = 0; 1714 b->hd = 0; 1715 b->ht_size = 0; 1716 b->ht_flag = PETSC_FALSE; 1717 b->ht_fact = 0; 1718 b->ht_total_ct = 0; 1719 b->ht_insert_ct = 0; 1720 1721 b->in_loc = 0; 1722 b->v_loc = 0; 1723 b->n_loc = 0; 1724 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1725 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1726 if (flg) { 1727 PetscReal fact = 1.39; 1728 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1729 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1730 if (fact <= 1.0) fact = 1.39; 1731 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1732 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1733 } 1734 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1735 1736 #if defined(PETSC_HAVE_PASTIX) 1737 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1738 "MatGetFactor_mpisbaij_pastix", 1739 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1740 #endif 1741 #if defined(PETSC_HAVE_MUMPS) 1742 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1743 "MatGetFactor_sbaij_mumps", 1744 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1745 #endif 1746 #if defined(PETSC_HAVE_SPOOLES) 1747 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1748 "MatGetFactor_mpisbaij_spooles", 1749 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1750 #endif 1751 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1752 "MatStoreValues_MPISBAIJ", 1753 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1754 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1755 "MatRetrieveValues_MPISBAIJ", 1756 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1757 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1758 "MatGetDiagonalBlock_MPISBAIJ", 1759 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1760 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1761 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1762 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1763 B->symmetric = PETSC_TRUE; 1764 B->structurally_symmetric = PETSC_TRUE; 1765 B->symmetric_set = PETSC_TRUE; 1766 B->structurally_symmetric_set = PETSC_TRUE; 1767 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1768 PetscFunctionReturn(0); 1769 } 1770 EXTERN_C_END 1771 1772 /*MC 1773 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1774 1775 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1776 and MATMPISBAIJ otherwise. 1777 1778 Options Database Keys: 1779 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1780 1781 Level: beginner 1782 1783 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1784 M*/ 1785 1786 EXTERN_C_BEGIN 1787 #undef __FUNCT__ 1788 #define __FUNCT__ "MatCreate_SBAIJ" 1789 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1790 { 1791 PetscErrorCode ierr; 1792 PetscMPIInt size; 1793 1794 PetscFunctionBegin; 1795 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1796 if (size == 1) { 1797 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1798 } else { 1799 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 EXTERN_C_END 1804 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1807 /*@C 1808 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1809 the user should preallocate the matrix storage by setting the parameters 1810 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1811 performance can be increased by more than a factor of 50. 1812 1813 Collective on Mat 1814 1815 Input Parameters: 1816 + A - the matrix 1817 . bs - size of blockk 1818 . d_nz - number of block nonzeros per block row in diagonal portion of local 1819 submatrix (same for all local rows) 1820 . d_nnz - array containing the number of block nonzeros in the various block rows 1821 in the upper triangular and diagonal part of the in diagonal portion of the local 1822 (possibly different for each block row) or PETSC_NULL. You must leave room 1823 for the diagonal entry even if it is zero. 1824 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1825 submatrix (same for all local rows). 1826 - o_nnz - array containing the number of nonzeros in the various block rows of the 1827 off-diagonal portion of the local submatrix (possibly different for 1828 each block row) or PETSC_NULL. 1829 1830 1831 Options Database Keys: 1832 . -mat_no_unroll - uses code that does not unroll the loops in the 1833 block calculations (much slower) 1834 . -mat_block_size - size of the blocks to use 1835 1836 Notes: 1837 1838 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1839 than it must be used on all processors that share the object for that argument. 1840 1841 If the *_nnz parameter is given then the *_nz parameter is ignored 1842 1843 Storage Information: 1844 For a square global matrix we define each processor's diagonal portion 1845 to be its local rows and the corresponding columns (a square submatrix); 1846 each processor's off-diagonal portion encompasses the remainder of the 1847 local matrix (a rectangular submatrix). 1848 1849 The user can specify preallocated storage for the diagonal part of 1850 the local submatrix with either d_nz or d_nnz (not both). Set 1851 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1852 memory allocation. Likewise, specify preallocated storage for the 1853 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1854 1855 You can call MatGetInfo() to get information on how effective the preallocation was; 1856 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1857 You can also run with the option -info and look for messages with the string 1858 malloc in them to see if additional memory allocation was needed. 1859 1860 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1861 the figure below we depict these three local rows and all columns (0-11). 1862 1863 .vb 1864 0 1 2 3 4 5 6 7 8 9 10 11 1865 ------------------- 1866 row 3 | o o o d d d o o o o o o 1867 row 4 | o o o d d d o o o o o o 1868 row 5 | o o o d d d o o o o o o 1869 ------------------- 1870 .ve 1871 1872 Thus, any entries in the d locations are stored in the d (diagonal) 1873 submatrix, and any entries in the o locations are stored in the 1874 o (off-diagonal) submatrix. Note that the d matrix is stored in 1875 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1876 1877 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1878 plus the diagonal part of the d matrix, 1879 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1880 In general, for PDE problems in which most nonzeros are near the diagonal, 1881 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1882 or you will get TERRIBLE performance; see the users' manual chapter on 1883 matrices. 1884 1885 Level: intermediate 1886 1887 .keywords: matrix, block, aij, compressed row, sparse, parallel 1888 1889 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1890 @*/ 1891 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1892 { 1893 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1894 1895 PetscFunctionBegin; 1896 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1897 if (f) { 1898 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 1903 #undef __FUNCT__ 1904 #define __FUNCT__ "MatCreateMPISBAIJ" 1905 /*@C 1906 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1907 (block compressed row). For good matrix assembly performance 1908 the user should preallocate the matrix storage by setting the parameters 1909 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1910 performance can be increased by more than a factor of 50. 1911 1912 Collective on MPI_Comm 1913 1914 Input Parameters: 1915 + comm - MPI communicator 1916 . bs - size of blockk 1917 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1918 This value should be the same as the local size used in creating the 1919 y vector for the matrix-vector product y = Ax. 1920 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1921 This value should be the same as the local size used in creating the 1922 x vector for the matrix-vector product y = Ax. 1923 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1924 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1925 . d_nz - number of block nonzeros per block row in diagonal portion of local 1926 submatrix (same for all local rows) 1927 . d_nnz - array containing the number of block nonzeros in the various block rows 1928 in the upper triangular portion of the in diagonal portion of the local 1929 (possibly different for each block block row) or PETSC_NULL. 1930 You must leave room for the diagonal entry even if it is zero. 1931 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1932 submatrix (same for all local rows). 1933 - o_nnz - array containing the number of nonzeros in the various block rows of the 1934 off-diagonal portion of the local submatrix (possibly different for 1935 each block row) or PETSC_NULL. 1936 1937 Output Parameter: 1938 . A - the matrix 1939 1940 Options Database Keys: 1941 . -mat_no_unroll - uses code that does not unroll the loops in the 1942 block calculations (much slower) 1943 . -mat_block_size - size of the blocks to use 1944 . -mat_mpi - use the parallel matrix data structures even on one processor 1945 (defaults to using SeqBAIJ format on one processor) 1946 1947 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1948 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1949 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1950 1951 Notes: 1952 The number of rows and columns must be divisible by blocksize. 1953 This matrix type does not support complex Hermitian operation. 1954 1955 The user MUST specify either the local or global matrix dimensions 1956 (possibly both). 1957 1958 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1959 than it must be used on all processors that share the object for that argument. 1960 1961 If the *_nnz parameter is given then the *_nz parameter is ignored 1962 1963 Storage Information: 1964 For a square global matrix we define each processor's diagonal portion 1965 to be its local rows and the corresponding columns (a square submatrix); 1966 each processor's off-diagonal portion encompasses the remainder of the 1967 local matrix (a rectangular submatrix). 1968 1969 The user can specify preallocated storage for the diagonal part of 1970 the local submatrix with either d_nz or d_nnz (not both). Set 1971 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1972 memory allocation. Likewise, specify preallocated storage for the 1973 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1974 1975 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1976 the figure below we depict these three local rows and all columns (0-11). 1977 1978 .vb 1979 0 1 2 3 4 5 6 7 8 9 10 11 1980 ------------------- 1981 row 3 | o o o d d d o o o o o o 1982 row 4 | o o o d d d o o o o o o 1983 row 5 | o o o d d d o o o o o o 1984 ------------------- 1985 .ve 1986 1987 Thus, any entries in the d locations are stored in the d (diagonal) 1988 submatrix, and any entries in the o locations are stored in the 1989 o (off-diagonal) submatrix. Note that the d matrix is stored in 1990 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1991 1992 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1993 plus the diagonal part of the d matrix, 1994 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1995 In general, for PDE problems in which most nonzeros are near the diagonal, 1996 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1997 or you will get TERRIBLE performance; see the users' manual chapter on 1998 matrices. 1999 2000 Level: intermediate 2001 2002 .keywords: matrix, block, aij, compressed row, sparse, parallel 2003 2004 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2005 @*/ 2006 2007 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2008 { 2009 PetscErrorCode ierr; 2010 PetscMPIInt size; 2011 2012 PetscFunctionBegin; 2013 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2014 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2015 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2016 if (size > 1) { 2017 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2018 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2019 } else { 2020 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2021 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2022 } 2023 PetscFunctionReturn(0); 2024 } 2025 2026 2027 #undef __FUNCT__ 2028 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2029 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2030 { 2031 Mat mat; 2032 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2033 PetscErrorCode ierr; 2034 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2035 PetscScalar *array; 2036 2037 PetscFunctionBegin; 2038 *newmat = 0; 2039 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2040 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2041 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2042 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2043 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2044 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2045 2046 mat->factortype = matin->factortype; 2047 mat->preallocated = PETSC_TRUE; 2048 mat->assembled = PETSC_TRUE; 2049 mat->insertmode = NOT_SET_VALUES; 2050 2051 a = (Mat_MPISBAIJ*)mat->data; 2052 a->bs2 = oldmat->bs2; 2053 a->mbs = oldmat->mbs; 2054 a->nbs = oldmat->nbs; 2055 a->Mbs = oldmat->Mbs; 2056 a->Nbs = oldmat->Nbs; 2057 2058 2059 a->size = oldmat->size; 2060 a->rank = oldmat->rank; 2061 a->donotstash = oldmat->donotstash; 2062 a->roworiented = oldmat->roworiented; 2063 a->rowindices = 0; 2064 a->rowvalues = 0; 2065 a->getrowactive = PETSC_FALSE; 2066 a->barray = 0; 2067 a->rstartbs = oldmat->rstartbs; 2068 a->rendbs = oldmat->rendbs; 2069 a->cstartbs = oldmat->cstartbs; 2070 a->cendbs = oldmat->cendbs; 2071 2072 /* hash table stuff */ 2073 a->ht = 0; 2074 a->hd = 0; 2075 a->ht_size = 0; 2076 a->ht_flag = oldmat->ht_flag; 2077 a->ht_fact = oldmat->ht_fact; 2078 a->ht_total_ct = 0; 2079 a->ht_insert_ct = 0; 2080 2081 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2082 if (oldmat->colmap) { 2083 #if defined (PETSC_USE_CTABLE) 2084 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2085 #else 2086 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2087 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2088 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2089 #endif 2090 } else a->colmap = 0; 2091 2092 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2093 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2094 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2095 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2096 } else a->garray = 0; 2097 2098 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2099 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2100 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2101 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2102 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2103 2104 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2105 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2106 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2107 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2108 2109 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2110 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2111 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2112 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2113 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2114 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2115 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2116 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2117 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2118 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2119 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2120 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2121 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2122 2123 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2124 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2125 a->sMvctx = oldmat->sMvctx; 2126 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2127 2128 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2129 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2130 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2131 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2132 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2133 *newmat = mat; 2134 PetscFunctionReturn(0); 2135 } 2136 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "MatLoad_MPISBAIJ" 2139 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2140 { 2141 Mat A; 2142 PetscErrorCode ierr; 2143 PetscInt i,nz,j,rstart,rend; 2144 PetscScalar *vals,*buf; 2145 MPI_Comm comm = ((PetscObject)viewer)->comm; 2146 MPI_Status status; 2147 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2148 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2149 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2150 PetscInt bs=1,Mbs,mbs,extra_rows; 2151 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2152 PetscInt dcount,kmax,k,nzcount,tmp; 2153 int fd; 2154 2155 PetscFunctionBegin; 2156 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2157 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2158 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2159 2160 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2161 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2162 if (!rank) { 2163 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2164 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2165 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2166 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2167 } 2168 2169 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2170 M = header[1]; N = header[2]; 2171 2172 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2173 2174 /* 2175 This code adds extra rows to make sure the number of rows is 2176 divisible by the blocksize 2177 */ 2178 Mbs = M/bs; 2179 extra_rows = bs - M + bs*(Mbs); 2180 if (extra_rows == bs) extra_rows = 0; 2181 else Mbs++; 2182 if (extra_rows &&!rank) { 2183 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2184 } 2185 2186 /* determine ownership of all rows */ 2187 mbs = Mbs/size + ((Mbs % size) > rank); 2188 m = mbs*bs; 2189 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2190 mmbs = PetscMPIIntCast(mbs); 2191 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2192 rowners[0] = 0; 2193 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2194 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2195 rstart = rowners[rank]; 2196 rend = rowners[rank+1]; 2197 2198 /* distribute row lengths to all processors */ 2199 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2200 if (!rank) { 2201 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2202 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2203 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2204 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2205 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2206 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2207 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2208 } else { 2209 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2210 } 2211 2212 if (!rank) { /* procs[0] */ 2213 /* calculate the number of nonzeros on each processor */ 2214 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2215 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2216 for (i=0; i<size; i++) { 2217 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2218 procsnz[i] += rowlengths[j]; 2219 } 2220 } 2221 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2222 2223 /* determine max buffer needed and allocate it */ 2224 maxnz = 0; 2225 for (i=0; i<size; i++) { 2226 maxnz = PetscMax(maxnz,procsnz[i]); 2227 } 2228 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2229 2230 /* read in my part of the matrix column indices */ 2231 nz = procsnz[0]; 2232 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2233 mycols = ibuf; 2234 if (size == 1) nz -= extra_rows; 2235 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2236 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2237 2238 /* read in every ones (except the last) and ship off */ 2239 for (i=1; i<size-1; i++) { 2240 nz = procsnz[i]; 2241 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2242 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2243 } 2244 /* read in the stuff for the last proc */ 2245 if (size != 1) { 2246 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2247 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2248 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2249 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2250 } 2251 ierr = PetscFree(cols);CHKERRQ(ierr); 2252 } else { /* procs[i], i>0 */ 2253 /* determine buffer space needed for message */ 2254 nz = 0; 2255 for (i=0; i<m; i++) { 2256 nz += locrowlens[i]; 2257 } 2258 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2259 mycols = ibuf; 2260 /* receive message of column indices*/ 2261 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2262 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2263 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2264 } 2265 2266 /* loop over local rows, determining number of off diagonal entries */ 2267 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2268 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2269 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2270 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2271 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2272 rowcount = 0; 2273 nzcount = 0; 2274 for (i=0; i<mbs; i++) { 2275 dcount = 0; 2276 odcount = 0; 2277 for (j=0; j<bs; j++) { 2278 kmax = locrowlens[rowcount]; 2279 for (k=0; k<kmax; k++) { 2280 tmp = mycols[nzcount++]/bs; /* block col. index */ 2281 if (!mask[tmp]) { 2282 mask[tmp] = 1; 2283 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2284 else masked1[dcount++] = tmp; /* entry in diag portion */ 2285 } 2286 } 2287 rowcount++; 2288 } 2289 2290 dlens[i] = dcount; /* d_nzz[i] */ 2291 odlens[i] = odcount; /* o_nzz[i] */ 2292 2293 /* zero out the mask elements we set */ 2294 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2295 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2296 } 2297 2298 /* create our matrix */ 2299 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2300 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2301 ierr = MatSetType(A,type);CHKERRQ(ierr); 2302 ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2303 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2304 2305 if (!rank) { 2306 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2307 /* read in my part of the matrix numerical values */ 2308 nz = procsnz[0]; 2309 vals = buf; 2310 mycols = ibuf; 2311 if (size == 1) nz -= extra_rows; 2312 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2313 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2314 2315 /* insert into matrix */ 2316 jj = rstart*bs; 2317 for (i=0; i<m; i++) { 2318 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2319 mycols += locrowlens[i]; 2320 vals += locrowlens[i]; 2321 jj++; 2322 } 2323 2324 /* read in other processors (except the last one) and ship out */ 2325 for (i=1; i<size-1; i++) { 2326 nz = procsnz[i]; 2327 vals = buf; 2328 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2329 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2330 } 2331 /* the last proc */ 2332 if (size != 1){ 2333 nz = procsnz[i] - extra_rows; 2334 vals = buf; 2335 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2336 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2337 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2338 } 2339 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2340 2341 } else { 2342 /* receive numeric values */ 2343 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2344 2345 /* receive message of values*/ 2346 vals = buf; 2347 mycols = ibuf; 2348 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2349 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2350 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2351 2352 /* insert into matrix */ 2353 jj = rstart*bs; 2354 for (i=0; i<m; i++) { 2355 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2356 mycols += locrowlens[i]; 2357 vals += locrowlens[i]; 2358 jj++; 2359 } 2360 } 2361 2362 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2363 ierr = PetscFree(buf);CHKERRQ(ierr); 2364 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2365 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2366 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2367 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2368 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2369 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2370 *newmat = A; 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2376 /*XXXXX@ 2377 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2378 2379 Input Parameters: 2380 . mat - the matrix 2381 . fact - factor 2382 2383 Collective on Mat 2384 2385 Level: advanced 2386 2387 Notes: 2388 This can also be set by the command line option: -mat_use_hash_table fact 2389 2390 .keywords: matrix, hashtable, factor, HT 2391 2392 .seealso: MatSetOption() 2393 @XXXXX*/ 2394 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2398 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2399 { 2400 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2401 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2402 PetscReal atmp; 2403 PetscReal *work,*svalues,*rvalues; 2404 PetscErrorCode ierr; 2405 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2406 PetscMPIInt rank,size; 2407 PetscInt *rowners_bs,dest,count,source; 2408 PetscScalar *va; 2409 MatScalar *ba; 2410 MPI_Status stat; 2411 2412 PetscFunctionBegin; 2413 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2414 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2415 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2416 2417 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2418 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2419 2420 bs = A->rmap->bs; 2421 mbs = a->mbs; 2422 Mbs = a->Mbs; 2423 ba = b->a; 2424 bi = b->i; 2425 bj = b->j; 2426 2427 /* find ownerships */ 2428 rowners_bs = A->rmap->range; 2429 2430 /* each proc creates an array to be distributed */ 2431 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2432 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2433 2434 /* row_max for B */ 2435 if (rank != size-1){ 2436 for (i=0; i<mbs; i++) { 2437 ncols = bi[1] - bi[0]; bi++; 2438 brow = bs*i; 2439 for (j=0; j<ncols; j++){ 2440 bcol = bs*(*bj); 2441 for (kcol=0; kcol<bs; kcol++){ 2442 col = bcol + kcol; /* local col index */ 2443 col += rowners_bs[rank+1]; /* global col index */ 2444 for (krow=0; krow<bs; krow++){ 2445 atmp = PetscAbsScalar(*ba); ba++; 2446 row = brow + krow; /* local row index */ 2447 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2448 if (work[col] < atmp) work[col] = atmp; 2449 } 2450 } 2451 bj++; 2452 } 2453 } 2454 2455 /* send values to its owners */ 2456 for (dest=rank+1; dest<size; dest++){ 2457 svalues = work + rowners_bs[dest]; 2458 count = rowners_bs[dest+1]-rowners_bs[dest]; 2459 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2460 } 2461 } 2462 2463 /* receive values */ 2464 if (rank){ 2465 rvalues = work; 2466 count = rowners_bs[rank+1]-rowners_bs[rank]; 2467 for (source=0; source<rank; source++){ 2468 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2469 /* process values */ 2470 for (i=0; i<count; i++){ 2471 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2472 } 2473 } 2474 } 2475 2476 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2477 ierr = PetscFree(work);CHKERRQ(ierr); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatSOR_MPISBAIJ" 2483 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2484 { 2485 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2486 PetscErrorCode ierr; 2487 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2488 PetscScalar *x,*ptr,*from; 2489 Vec bb1; 2490 const PetscScalar *b; 2491 2492 PetscFunctionBegin; 2493 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2494 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2495 2496 if (flag == SOR_APPLY_UPPER) { 2497 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2498 PetscFunctionReturn(0); 2499 } 2500 2501 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2502 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2503 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2504 its--; 2505 } 2506 2507 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2508 while (its--){ 2509 2510 /* lower triangular part: slvec0b = - B^T*xx */ 2511 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2512 2513 /* copy xx into slvec0a */ 2514 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2515 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2516 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2517 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2518 2519 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2520 2521 /* copy bb into slvec1a */ 2522 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2523 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2524 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2525 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2526 2527 /* set slvec1b = 0 */ 2528 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2529 2530 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2531 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2532 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2533 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2534 2535 /* upper triangular part: bb1 = bb1 - B*x */ 2536 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2537 2538 /* local diagonal sweep */ 2539 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2540 } 2541 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2542 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2543 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2544 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2545 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2546 } else if (flag & SOR_EISENSTAT) { 2547 Vec xx1; 2548 PetscTruth hasop; 2549 const PetscScalar *diag; 2550 PetscScalar *sl,scale = (omega - 2.0)/omega; 2551 PetscInt i,n; 2552 2553 if (!mat->xx1) { 2554 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2555 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2556 } 2557 xx1 = mat->xx1; 2558 bb1 = mat->bb1; 2559 2560 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2561 2562 if (!mat->diag) { 2563 /* this is wrong for same matrix with new nonzero values */ 2564 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2565 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2566 } 2567 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2568 2569 if (hasop) { 2570 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2571 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2572 } else { 2573 /* 2574 These two lines are replaced by code that may be a bit faster for a good compiler 2575 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2576 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2577 */ 2578 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2579 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2580 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2581 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2582 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2583 if (omega == 1.0) { 2584 for (i=0; i<n; i++) { 2585 sl[i] = b[i] - diag[i]*x[i]; 2586 } 2587 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2588 } else { 2589 for (i=0; i<n; i++) { 2590 sl[i] = b[i] + scale*diag[i]*x[i]; 2591 } 2592 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2593 } 2594 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2595 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2596 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2597 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2598 } 2599 2600 /* multiply off-diagonal portion of matrix */ 2601 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2602 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2603 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2604 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2605 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2606 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2607 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2608 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2609 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2610 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2611 2612 /* local sweep */ 2613 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2614 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2615 } else { 2616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2617 } 2618 PetscFunctionReturn(0); 2619 } 2620 2621 #undef __FUNCT__ 2622 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2623 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2624 { 2625 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2626 PetscErrorCode ierr; 2627 Vec lvec1,bb1; 2628 2629 PetscFunctionBegin; 2630 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2631 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2632 2633 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2634 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2635 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2636 its--; 2637 } 2638 2639 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2640 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2641 while (its--){ 2642 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2643 2644 /* lower diagonal part: bb1 = bb - B^T*xx */ 2645 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2646 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2647 2648 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2649 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2650 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2651 2652 /* upper diagonal part: bb1 = bb1 - B*x */ 2653 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2654 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2655 2656 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2657 2658 /* diagonal sweep */ 2659 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2660 } 2661 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2662 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2663 } else { 2664 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2665 } 2666 PetscFunctionReturn(0); 2667 } 2668 2669