1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h" 5 #include "../src/mat/impls/sbaij/seq/sbaij.h" 6 #include "petscblaslapack.h" 7 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 EXTERN_C_BEGIN 25 #undef __FUNCT__ 26 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 28 { 29 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 30 PetscErrorCode ierr; 31 32 PetscFunctionBegin; 33 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 34 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 EXTERN_C_BEGIN 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 43 { 44 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 45 PetscErrorCode ierr; 46 47 PetscFunctionBegin; 48 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 49 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 EXTERN_C_END 53 54 55 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 56 { \ 57 \ 58 brow = row/bs; \ 59 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 60 rmax = aimax[brow]; nrow = ailen[brow]; \ 61 bcol = col/bs; \ 62 ridx = row % bs; cidx = col % bs; \ 63 low = 0; high = nrow; \ 64 while (high-low > 3) { \ 65 t = (low+high)/2; \ 66 if (rp[t] > bcol) high = t; \ 67 else low = t; \ 68 } \ 69 for (_i=low; _i<high; _i++) { \ 70 if (rp[_i] > bcol) break; \ 71 if (rp[_i] == bcol) { \ 72 bap = ap + bs2*_i + bs*cidx + ridx; \ 73 if (addv == ADD_VALUES) *bap += value; \ 74 else *bap = value; \ 75 goto a_noinsert; \ 76 } \ 77 } \ 78 if (a->nonew == 1) goto a_noinsert; \ 79 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 80 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 81 N = nrow++ - 1; \ 82 /* shift up all the later entries in this row */ \ 83 for (ii=N; ii>=_i; ii--) { \ 84 rp[ii+1] = rp[ii]; \ 85 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 86 } \ 87 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 88 rp[_i] = bcol; \ 89 ap[bs2*_i + bs*cidx + ridx] = value; \ 90 a_noinsert:; \ 91 ailen[brow] = nrow; \ 92 } 93 94 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 95 { \ 96 brow = row/bs; \ 97 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 98 rmax = bimax[brow]; nrow = bilen[brow]; \ 99 bcol = col/bs; \ 100 ridx = row % bs; cidx = col % bs; \ 101 low = 0; high = nrow; \ 102 while (high-low > 3) { \ 103 t = (low+high)/2; \ 104 if (rp[t] > bcol) high = t; \ 105 else low = t; \ 106 } \ 107 for (_i=low; _i<high; _i++) { \ 108 if (rp[_i] > bcol) break; \ 109 if (rp[_i] == bcol) { \ 110 bap = ap + bs2*_i + bs*cidx + ridx; \ 111 if (addv == ADD_VALUES) *bap += value; \ 112 else *bap = value; \ 113 goto b_noinsert; \ 114 } \ 115 } \ 116 if (b->nonew == 1) goto b_noinsert; \ 117 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 118 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 119 N = nrow++ - 1; \ 120 /* shift up all the later entries in this row */ \ 121 for (ii=N; ii>=_i; ii--) { \ 122 rp[ii+1] = rp[ii]; \ 123 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 124 } \ 125 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 126 rp[_i] = bcol; \ 127 ap[bs2*_i + bs*cidx + ridx] = value; \ 128 b_noinsert:; \ 129 bilen[brow] = nrow; \ 130 } 131 132 /* Only add/insert a(i,j) with i<=j (blocks). 133 Any a(i,j) with i>j input by user is ingored. 134 */ 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatSetValues_MPISBAIJ" 137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 138 { 139 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 140 MatScalar value; 141 PetscTruth roworiented = baij->roworiented; 142 PetscErrorCode ierr; 143 PetscInt i,j,row,col; 144 PetscInt rstart_orig=mat->rmap->rstart; 145 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 146 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 147 148 /* Some Variables required in the macro */ 149 Mat A = baij->A; 150 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 151 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 152 MatScalar *aa=a->a; 153 154 Mat B = baij->B; 155 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 156 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 157 MatScalar *ba=b->a; 158 159 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 160 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 161 MatScalar *ap,*bap; 162 163 /* for stash */ 164 PetscInt n_loc, *in_loc = PETSC_NULL; 165 MatScalar *v_loc = PETSC_NULL; 166 167 PetscFunctionBegin; 168 if (v) PetscValidScalarPointer(v,6); 169 if (!baij->donotstash){ 170 if (n > baij->n_loc) { 171 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 172 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 174 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 175 baij->n_loc = n; 176 } 177 in_loc = baij->in_loc; 178 v_loc = baij->v_loc; 179 } 180 181 for (i=0; i<m; i++) { 182 if (im[i] < 0) continue; 183 #if defined(PETSC_USE_DEBUG) 184 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 185 #endif 186 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 187 row = im[i] - rstart_orig; /* local row index */ 188 for (j=0; j<n; j++) { 189 if (im[i]/bs > in[j]/bs){ 190 if (a->ignore_ltriangular){ 191 continue; /* ignore lower triangular blocks */ 192 } else { 193 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 194 } 195 } 196 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 197 col = in[j] - cstart_orig; /* local col index */ 198 brow = row/bs; bcol = col/bs; 199 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 200 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 201 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 202 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 203 } else if (in[j] < 0) continue; 204 #if defined(PETSC_USE_DEBUG) 205 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 206 #endif 207 else { /* off-diag entry (B) */ 208 if (mat->was_assembled) { 209 if (!baij->colmap) { 210 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 211 } 212 #if defined (PETSC_USE_CTABLE) 213 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 214 col = col - 1; 215 #else 216 col = baij->colmap[in[j]/bs] - 1; 217 #endif 218 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 219 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 220 col = in[j]; 221 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 222 B = baij->B; 223 b = (Mat_SeqBAIJ*)(B)->data; 224 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 225 ba=b->a; 226 } else col += in[j]%bs; 227 } else col = in[j]; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } 232 } 233 } else { /* off processor entry */ 234 if (!baij->donotstash) { 235 n_loc = 0; 236 for (j=0; j<n; j++){ 237 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 238 in_loc[n_loc] = in[j]; 239 if (roworiented) { 240 v_loc[n_loc] = v[i*n+j]; 241 } else { 242 v_loc[n_loc] = v[j*m+i]; 243 } 244 n_loc++; 245 } 246 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 247 } 248 } 249 } 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 256 { 257 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 258 const MatScalar *value; 259 MatScalar *barray=baij->barray; 260 PetscTruth roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 261 PetscErrorCode ierr; 262 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 263 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 264 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 265 266 PetscFunctionBegin; 267 if(!barray) { 268 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 269 baij->barray = barray; 270 } 271 272 if (roworiented) { 273 stepval = (n-1)*bs; 274 } else { 275 stepval = (m-1)*bs; 276 } 277 for (i=0; i<m; i++) { 278 if (im[i] < 0) continue; 279 #if defined(PETSC_USE_DEBUG) 280 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 281 #endif 282 if (im[i] >= rstart && im[i] < rend) { 283 row = im[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (im[i] > in[j]) { 286 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 287 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 288 } 289 /* If NumCol = 1 then a copy is not required */ 290 if ((roworiented) && (n == 1)) { 291 barray = (MatScalar*) v + i*bs2; 292 } else if((!roworiented) && (m == 1)) { 293 barray = (MatScalar*) v + j*bs2; 294 } else { /* Here a copy is required */ 295 if (roworiented) { 296 value = v + i*(stepval+bs)*bs + j*bs; 297 } else { 298 value = v + j*(stepval+bs)*bs + i*bs; 299 } 300 for (ii=0; ii<bs; ii++,value+=stepval) { 301 for (jj=0; jj<bs; jj++) { 302 *barray++ = *value++; 303 } 304 } 305 barray -=bs2; 306 } 307 308 if (in[j] >= cstart && in[j] < cend){ 309 col = in[j] - cstart; 310 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 311 } 312 else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined (PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined (PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } 343 else col = in[j]; 344 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 345 } 346 } 347 } else { 348 if (!baij->donotstash) { 349 if (roworiented) { 350 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 351 } else { 352 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 353 } 354 } 355 } 356 } 357 PetscFunctionReturn(0); 358 } 359 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatGetValues_MPISBAIJ" 362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 363 { 364 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 365 PetscErrorCode ierr; 366 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 367 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 368 369 PetscFunctionBegin; 370 for (i=0; i<m; i++) { 371 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 372 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 373 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 374 row = idxm[i] - bsrstart; 375 for (j=0; j<n; j++) { 376 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 377 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 378 if (idxn[j] >= bscstart && idxn[j] < bscend){ 379 col = idxn[j] - bscstart; 380 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 381 } else { 382 if (!baij->colmap) { 383 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 384 } 385 #if defined (PETSC_USE_CTABLE) 386 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 387 data --; 388 #else 389 data = baij->colmap[idxn[j]/bs]-1; 390 #endif 391 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 392 else { 393 col = data + idxn[j]%bs; 394 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 395 } 396 } 397 } 398 } else { 399 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatNorm_MPISBAIJ" 407 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 408 { 409 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscReal sum[2],*lnorm2; 412 413 PetscFunctionBegin; 414 if (baij->size == 1) { 415 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 416 } else { 417 if (type == NORM_FROBENIUS) { 418 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 419 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 420 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 421 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 422 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 423 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 424 *norm = sqrt(sum[0] + 2*sum[1]); 425 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 426 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 427 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 428 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 429 PetscReal *rsum,*rsum2,vabs; 430 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 431 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 432 MatScalar *v; 433 434 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 435 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 436 /* Amat */ 437 v = amat->a; jj = amat->j; 438 for (brow=0; brow<mbs; brow++) { 439 grow = bs*(rstart + brow); 440 nz = amat->i[brow+1] - amat->i[brow]; 441 for (bcol=0; bcol<nz; bcol++){ 442 gcol = bs*(rstart + *jj); jj++; 443 for (col=0; col<bs; col++){ 444 for (row=0; row<bs; row++){ 445 vabs = PetscAbsScalar(*v); v++; 446 rsum[gcol+col] += vabs; 447 /* non-diagonal block */ 448 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 449 } 450 } 451 } 452 } 453 /* Bmat */ 454 v = bmat->a; jj = bmat->j; 455 for (brow=0; brow<mbs; brow++) { 456 grow = bs*(rstart + brow); 457 nz = bmat->i[brow+1] - bmat->i[brow]; 458 for (bcol=0; bcol<nz; bcol++){ 459 gcol = bs*garray[*jj]; jj++; 460 for (col=0; col<bs; col++){ 461 for (row=0; row<bs; row++){ 462 vabs = PetscAbsScalar(*v); v++; 463 rsum[gcol+col] += vabs; 464 rsum[grow+row] += vabs; 465 } 466 } 467 } 468 } 469 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 470 *norm = 0.0; 471 for (col=0; col<mat->cmap->N; col++) { 472 if (rsum2[col] > *norm) *norm = rsum2[col]; 473 } 474 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 475 } else { 476 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 477 } 478 } 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 484 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 485 { 486 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 487 PetscErrorCode ierr; 488 PetscInt nstash,reallocs; 489 InsertMode addv; 490 491 PetscFunctionBegin; 492 if (baij->donotstash) { 493 PetscFunctionReturn(0); 494 } 495 496 /* make sure all processors are either in INSERTMODE or ADDMODE */ 497 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 498 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 499 mat->insertmode = addv; /* in case this processor had no cache */ 500 501 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 502 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 503 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 504 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 505 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 506 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 513 { 514 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 515 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 516 PetscErrorCode ierr; 517 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 518 PetscInt *row,*col; 519 PetscTruth other_disassembled; 520 PetscMPIInt n; 521 PetscTruth r1,r2,r3; 522 MatScalar *val; 523 InsertMode addv = mat->insertmode; 524 525 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 526 PetscFunctionBegin; 527 528 if (!baij->donotstash) { 529 while (1) { 530 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 531 if (!flg) break; 532 533 for (i=0; i<n;) { 534 /* Now identify the consecutive vals belonging to the same row */ 535 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 536 if (j < n) ncols = j-i; 537 else ncols = n-i; 538 /* Now assemble all these values with a single function call */ 539 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 540 i = j; 541 } 542 } 543 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 544 /* Now process the block-stash. Since the values are stashed column-oriented, 545 set the roworiented flag to column oriented, and after MatSetValues() 546 restore the original flags */ 547 r1 = baij->roworiented; 548 r2 = a->roworiented; 549 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 550 baij->roworiented = PETSC_FALSE; 551 a->roworiented = PETSC_FALSE; 552 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 553 while (1) { 554 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 555 if (!flg) break; 556 557 for (i=0; i<n;) { 558 /* Now identify the consecutive vals belonging to the same row */ 559 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 560 if (j < n) ncols = j-i; 561 else ncols = n-i; 562 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 563 i = j; 564 } 565 } 566 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 567 baij->roworiented = r1; 568 a->roworiented = r2; 569 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 570 } 571 572 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 573 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 574 575 /* determine if any processor has disassembled, if so we must 576 also disassemble ourselfs, in order that we may reassemble. */ 577 /* 578 if nonzero structure of submatrix B cannot change then we know that 579 no processor disassembled thus we can skip this stuff 580 */ 581 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 582 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 583 if (mat->was_assembled && !other_disassembled) { 584 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 585 } 586 } 587 588 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 589 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 590 } 591 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 592 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 594 595 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 596 baij->rowvalues = 0; 597 598 PetscFunctionReturn(0); 599 } 600 601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 605 { 606 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 607 PetscErrorCode ierr; 608 PetscInt bs = mat->rmap->bs; 609 PetscMPIInt size = baij->size,rank = baij->rank; 610 PetscTruth iascii,isdraw; 611 PetscViewer sviewer; 612 PetscViewerFormat format; 613 614 PetscFunctionBegin; 615 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 616 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 617 if (iascii) { 618 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 619 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 620 MatInfo info; 621 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 622 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 623 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 624 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 625 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 626 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 628 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 630 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 631 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 632 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 633 PetscFunctionReturn(0); 634 } else if (format == PETSC_VIEWER_ASCII_INFO) { 635 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 638 PetscFunctionReturn(0); 639 } 640 } 641 642 if (isdraw) { 643 PetscDraw draw; 644 PetscTruth isnull; 645 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 646 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 647 } 648 649 if (size == 1) { 650 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 651 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 652 } else { 653 /* assemble the entire matrix onto first processor. */ 654 Mat A; 655 Mat_SeqSBAIJ *Aloc; 656 Mat_SeqBAIJ *Bloc; 657 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 658 MatScalar *a; 659 660 /* Should this be the same type as mat? */ 661 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 662 if (!rank) { 663 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 664 } else { 665 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 666 } 667 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 668 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 669 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 670 671 /* copy over the A part */ 672 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 673 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 674 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 675 676 for (i=0; i<mbs; i++) { 677 rvals[0] = bs*(baij->rstartbs + i); 678 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 679 for (j=ai[i]; j<ai[i+1]; j++) { 680 col = (baij->cstartbs+aj[j])*bs; 681 for (k=0; k<bs; k++) { 682 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 683 col++; a += bs; 684 } 685 } 686 } 687 /* copy over the B part */ 688 Bloc = (Mat_SeqBAIJ*)baij->B->data; 689 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 690 for (i=0; i<mbs; i++) { 691 692 rvals[0] = bs*(baij->rstartbs + i); 693 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 694 for (j=ai[i]; j<ai[i+1]; j++) { 695 col = baij->garray[aj[j]]*bs; 696 for (k=0; k<bs; k++) { 697 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 698 col++; a += bs; 699 } 700 } 701 } 702 ierr = PetscFree(rvals);CHKERRQ(ierr); 703 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 705 /* 706 Everyone has to call to draw the matrix since the graphics waits are 707 synchronized across all processors that share the PetscDraw object 708 */ 709 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 710 if (!rank) { 711 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 712 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 713 } 714 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 715 ierr = MatDestroy(A);CHKERRQ(ierr); 716 } 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNCT__ 721 #define __FUNCT__ "MatView_MPISBAIJ" 722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 723 { 724 PetscErrorCode ierr; 725 PetscTruth iascii,isdraw,issocket,isbinary; 726 727 PetscFunctionBegin; 728 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 730 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 732 if (iascii || isdraw || issocket || isbinary) { 733 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 734 } else { 735 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatDestroy_MPISBAIJ" 742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 743 { 744 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 #if defined(PETSC_USE_LOG) 749 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 750 #endif 751 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 752 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 753 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 754 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 755 #if defined (PETSC_USE_CTABLE) 756 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 757 #else 758 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 759 #endif 760 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 761 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 762 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 763 if (baij->slvec0) { 764 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 765 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 766 } 767 if (baij->slvec1) { 768 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 769 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 770 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 771 } 772 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 773 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 774 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 775 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 776 if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);} 777 if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);} 778 if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);} 779 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 780 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 781 #endif 782 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 783 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 784 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 785 ierr = PetscFree(baij);CHKERRQ(ierr); 786 787 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 798 { 799 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 800 PetscErrorCode ierr; 801 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 802 PetscScalar *x,*from; 803 804 PetscFunctionBegin; 805 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 806 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 807 808 /* diagonal part */ 809 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 810 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 811 812 /* subdiagonal part */ 813 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 814 815 /* copy x into the vec slvec0 */ 816 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 819 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 820 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 821 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 822 823 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 /* supperdiagonal part */ 826 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatMult_MPISBAIJ" 832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 833 { 834 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 835 PetscErrorCode ierr; 836 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 837 PetscScalar *x,*from; 838 839 PetscFunctionBegin; 840 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 841 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 842 843 /* diagonal part */ 844 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 845 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 846 847 /* subdiagonal part */ 848 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 849 850 /* copy x into the vec slvec0 */ 851 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 852 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 853 854 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 855 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 856 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 857 858 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 /* supperdiagonal part */ 861 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 868 { 869 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 870 PetscErrorCode ierr; 871 PetscInt nt; 872 873 PetscFunctionBegin; 874 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 875 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 876 877 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 878 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 879 880 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 /* do diagonal part */ 882 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 883 /* do supperdiagonal part */ 884 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 886 /* do subdiagonal part */ 887 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 888 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 889 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 890 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 896 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 897 { 898 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 899 PetscErrorCode ierr; 900 PetscInt mbs=a->mbs,bs=A->rmap->bs; 901 PetscScalar *x,*from,zero=0.0; 902 903 PetscFunctionBegin; 904 /* 905 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 906 PetscSynchronizedFlush(((PetscObject)A)->comm); 907 */ 908 /* diagonal part */ 909 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 910 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 911 912 /* subdiagonal part */ 913 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 914 915 /* copy x into the vec slvec0 */ 916 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 917 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 918 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 919 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 920 921 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 923 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924 925 /* supperdiagonal part */ 926 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 927 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 934 { 935 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 936 PetscErrorCode ierr; 937 938 PetscFunctionBegin; 939 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 /* do diagonal part */ 941 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 942 /* do supperdiagonal part */ 943 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 945 946 /* do subdiagonal part */ 947 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 948 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 949 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 950 951 PetscFunctionReturn(0); 952 } 953 954 /* 955 This only works correctly for square matrices where the subblock A->A is the 956 diagonal block 957 */ 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 961 { 962 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 967 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatScale_MPISBAIJ" 973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 974 { 975 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 980 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatGetRow_MPISBAIJ" 986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 987 { 988 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 989 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 990 PetscErrorCode ierr; 991 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 992 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 993 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 994 995 PetscFunctionBegin; 996 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 997 mat->getrowactive = PETSC_TRUE; 998 999 if (!mat->rowvalues && (idx || v)) { 1000 /* 1001 allocate enough space to hold information from the longest row. 1002 */ 1003 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1004 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1005 PetscInt max = 1,mbs = mat->mbs,tmp; 1006 for (i=0; i<mbs; i++) { 1007 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1008 if (max < tmp) { max = tmp; } 1009 } 1010 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1011 } 1012 1013 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1014 lrow = row - brstart; /* local row index */ 1015 1016 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1017 if (!v) {pvA = 0; pvB = 0;} 1018 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1019 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1020 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1021 nztot = nzA + nzB; 1022 1023 cmap = mat->garray; 1024 if (v || idx) { 1025 if (nztot) { 1026 /* Sort by increasing column numbers, assuming A and B already sorted */ 1027 PetscInt imark = -1; 1028 if (v) { 1029 *v = v_p = mat->rowvalues; 1030 for (i=0; i<nzB; i++) { 1031 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1032 else break; 1033 } 1034 imark = i; 1035 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1036 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1037 } 1038 if (idx) { 1039 *idx = idx_p = mat->rowindices; 1040 if (imark > -1) { 1041 for (i=0; i<imark; i++) { 1042 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1043 } 1044 } else { 1045 for (i=0; i<nzB; i++) { 1046 if (cmap[cworkB[i]/bs] < cstart) 1047 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1048 else break; 1049 } 1050 imark = i; 1051 } 1052 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1053 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1054 } 1055 } else { 1056 if (idx) *idx = 0; 1057 if (v) *v = 0; 1058 } 1059 } 1060 *nz = nztot; 1061 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1062 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1069 { 1070 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1071 1072 PetscFunctionBegin; 1073 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1074 baij->getrowactive = PETSC_FALSE; 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1081 { 1082 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1083 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1084 1085 PetscFunctionBegin; 1086 aA->getrow_utriangular = PETSC_TRUE; 1087 PetscFunctionReturn(0); 1088 } 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1092 { 1093 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1094 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1095 1096 PetscFunctionBegin; 1097 aA->getrow_utriangular = PETSC_FALSE; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1104 { 1105 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1110 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1117 { 1118 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1123 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1130 { 1131 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1136 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1143 { 1144 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1145 Mat A = a->A,B = a->B; 1146 PetscErrorCode ierr; 1147 PetscReal isend[5],irecv[5]; 1148 1149 PetscFunctionBegin; 1150 info->block_size = (PetscReal)matin->rmap->bs; 1151 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1152 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1153 isend[3] = info->memory; isend[4] = info->mallocs; 1154 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1155 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1156 isend[3] += info->memory; isend[4] += info->mallocs; 1157 if (flag == MAT_LOCAL) { 1158 info->nz_used = isend[0]; 1159 info->nz_allocated = isend[1]; 1160 info->nz_unneeded = isend[2]; 1161 info->memory = isend[3]; 1162 info->mallocs = isend[4]; 1163 } else if (flag == MAT_GLOBAL_MAX) { 1164 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1165 info->nz_used = irecv[0]; 1166 info->nz_allocated = irecv[1]; 1167 info->nz_unneeded = irecv[2]; 1168 info->memory = irecv[3]; 1169 info->mallocs = irecv[4]; 1170 } else if (flag == MAT_GLOBAL_SUM) { 1171 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1172 info->nz_used = irecv[0]; 1173 info->nz_allocated = irecv[1]; 1174 info->nz_unneeded = irecv[2]; 1175 info->memory = irecv[3]; 1176 info->mallocs = irecv[4]; 1177 } else { 1178 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1179 } 1180 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1181 info->fill_ratio_needed = 0; 1182 info->factor_mallocs = 0; 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg) 1189 { 1190 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1191 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 switch (op) { 1196 case MAT_NEW_NONZERO_LOCATIONS: 1197 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1198 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1199 case MAT_KEEP_NONZERO_PATTERN: 1200 case MAT_NEW_NONZERO_LOCATION_ERR: 1201 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1202 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1203 break; 1204 case MAT_ROW_ORIENTED: 1205 a->roworiented = flg; 1206 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1207 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1208 break; 1209 case MAT_NEW_DIAGONALS: 1210 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1211 break; 1212 case MAT_IGNORE_OFF_PROC_ENTRIES: 1213 a->donotstash = flg; 1214 break; 1215 case MAT_USE_HASH_TABLE: 1216 a->ht_flag = flg; 1217 break; 1218 case MAT_HERMITIAN: 1219 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1220 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1221 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1222 break; 1223 case MAT_SPD: 1224 A->spd_set = PETSC_TRUE; 1225 A->spd = flg; 1226 if (flg) { 1227 A->symmetric = PETSC_TRUE; 1228 A->structurally_symmetric = PETSC_TRUE; 1229 A->symmetric_set = PETSC_TRUE; 1230 A->structurally_symmetric_set = PETSC_TRUE; 1231 } 1232 break; 1233 case MAT_SYMMETRIC: 1234 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1235 break; 1236 case MAT_STRUCTURALLY_SYMMETRIC: 1237 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1238 break; 1239 case MAT_SYMMETRY_ETERNAL: 1240 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1241 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1242 break; 1243 case MAT_IGNORE_LOWER_TRIANGULAR: 1244 aA->ignore_ltriangular = flg; 1245 break; 1246 case MAT_ERROR_LOWER_TRIANGULAR: 1247 aA->ignore_ltriangular = flg; 1248 break; 1249 case MAT_GETROW_UPPERTRIANGULAR: 1250 aA->getrow_utriangular = flg; 1251 break; 1252 default: 1253 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1260 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1261 { 1262 PetscErrorCode ierr; 1263 PetscFunctionBegin; 1264 if (MAT_INITIAL_MATRIX || *B != A) { 1265 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1266 } 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1272 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1273 { 1274 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1275 Mat a=baij->A, b=baij->B; 1276 PetscErrorCode ierr; 1277 PetscInt nv,m,n; 1278 PetscTruth flg; 1279 1280 PetscFunctionBegin; 1281 if (ll != rr){ 1282 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1283 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1284 } 1285 if (!ll) PetscFunctionReturn(0); 1286 1287 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1288 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1289 1290 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1291 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1292 1293 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1294 1295 /* left diagonalscale the off-diagonal part */ 1296 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1297 1298 /* scale the diagonal part */ 1299 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1300 1301 /* right diagonalscale the off-diagonal part */ 1302 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1303 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1309 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1310 { 1311 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatEqual_MPISBAIJ" 1323 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1324 { 1325 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1326 Mat a,b,c,d; 1327 PetscTruth flg; 1328 PetscErrorCode ierr; 1329 1330 PetscFunctionBegin; 1331 a = matA->A; b = matA->B; 1332 c = matB->A; d = matB->B; 1333 1334 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1335 if (flg) { 1336 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1337 } 1338 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatCopy_MPISBAIJ" 1344 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1345 { 1346 PetscErrorCode ierr; 1347 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1348 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1349 1350 PetscFunctionBegin; 1351 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1352 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1353 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1354 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1355 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1356 } else { 1357 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1358 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1359 } 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #undef __FUNCT__ 1364 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1365 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1366 { 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1376 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1377 { 1378 PetscErrorCode ierr; 1379 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1380 PetscBLASInt bnz,one=1; 1381 Mat_SeqSBAIJ *xa,*ya; 1382 Mat_SeqBAIJ *xb,*yb; 1383 1384 PetscFunctionBegin; 1385 if (str == SAME_NONZERO_PATTERN) { 1386 PetscScalar alpha = a; 1387 xa = (Mat_SeqSBAIJ *)xx->A->data; 1388 ya = (Mat_SeqSBAIJ *)yy->A->data; 1389 bnz = PetscBLASIntCast(xa->nz); 1390 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1391 xb = (Mat_SeqBAIJ *)xx->B->data; 1392 yb = (Mat_SeqBAIJ *)yy->B->data; 1393 bnz = PetscBLASIntCast(xb->nz); 1394 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1395 } else { 1396 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1397 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1398 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1405 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1406 { 1407 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1408 PetscInt rbs,cbs; 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1413 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1414 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1415 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1416 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1417 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1423 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1424 { 1425 PetscErrorCode ierr; 1426 PetscInt i; 1427 PetscTruth flg; 1428 1429 PetscFunctionBegin; 1430 for (i=0; i<n; i++) { 1431 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1432 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1433 } 1434 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 1439 /* -------------------------------------------------------------------*/ 1440 static struct _MatOps MatOps_Values = { 1441 MatSetValues_MPISBAIJ, 1442 MatGetRow_MPISBAIJ, 1443 MatRestoreRow_MPISBAIJ, 1444 MatMult_MPISBAIJ, 1445 /* 4*/ MatMultAdd_MPISBAIJ, 1446 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1447 MatMultAdd_MPISBAIJ, 1448 0, 1449 0, 1450 0, 1451 /*10*/ 0, 1452 0, 1453 0, 1454 MatSOR_MPISBAIJ, 1455 MatTranspose_MPISBAIJ, 1456 /*15*/ MatGetInfo_MPISBAIJ, 1457 MatEqual_MPISBAIJ, 1458 MatGetDiagonal_MPISBAIJ, 1459 MatDiagonalScale_MPISBAIJ, 1460 MatNorm_MPISBAIJ, 1461 /*20*/ MatAssemblyBegin_MPISBAIJ, 1462 MatAssemblyEnd_MPISBAIJ, 1463 MatSetOption_MPISBAIJ, 1464 MatZeroEntries_MPISBAIJ, 1465 /*24*/ 0, 1466 0, 1467 0, 1468 0, 1469 0, 1470 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1471 0, 1472 0, 1473 0, 1474 0, 1475 /*34*/ MatDuplicate_MPISBAIJ, 1476 0, 1477 0, 1478 0, 1479 0, 1480 /*39*/ MatAXPY_MPISBAIJ, 1481 MatGetSubMatrices_MPISBAIJ, 1482 MatIncreaseOverlap_MPISBAIJ, 1483 MatGetValues_MPISBAIJ, 1484 MatCopy_MPISBAIJ, 1485 /*44*/ 0, 1486 MatScale_MPISBAIJ, 1487 0, 1488 0, 1489 0, 1490 /*49*/ MatSetBlockSize_MPISBAIJ, 1491 0, 1492 0, 1493 0, 1494 0, 1495 /*54*/ 0, 1496 0, 1497 MatSetUnfactored_MPISBAIJ, 1498 0, 1499 MatSetValuesBlocked_MPISBAIJ, 1500 /*59*/ 0, 1501 0, 1502 0, 1503 0, 1504 0, 1505 /*64*/ 0, 1506 0, 1507 0, 1508 0, 1509 0, 1510 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1511 0, 1512 0, 1513 0, 1514 0, 1515 /*74*/ 0, 1516 0, 1517 0, 1518 0, 1519 0, 1520 /*79*/ 0, 1521 0, 1522 0, 1523 0, 1524 MatLoad_MPISBAIJ, 1525 /*84*/ 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*89*/ 0, 1531 0, 1532 0, 1533 0, 1534 0, 1535 /*94*/ 0, 1536 0, 1537 0, 1538 0, 1539 0, 1540 /*99*/ 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 /*104*/0, 1546 MatRealPart_MPISBAIJ, 1547 MatImaginaryPart_MPISBAIJ, 1548 MatGetRowUpperTriangular_MPISBAIJ, 1549 MatRestoreRowUpperTriangular_MPISBAIJ, 1550 /*109*/0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 /*114*/0, 1556 0, 1557 0, 1558 0, 1559 0, 1560 /*119*/0, 1561 0, 1562 0, 1563 0 1564 }; 1565 1566 1567 EXTERN_C_BEGIN 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1570 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1571 { 1572 PetscFunctionBegin; 1573 *a = ((Mat_MPISBAIJ *)A->data)->A; 1574 *iscopy = PETSC_FALSE; 1575 PetscFunctionReturn(0); 1576 } 1577 EXTERN_C_END 1578 1579 EXTERN_C_BEGIN 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1582 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1583 { 1584 Mat_MPISBAIJ *b; 1585 PetscErrorCode ierr; 1586 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1587 1588 PetscFunctionBegin; 1589 if (bs < 0){ 1590 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1591 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1592 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1593 bs = PetscAbs(bs); 1594 } 1595 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1596 bs = newbs; 1597 1598 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1599 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1600 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1601 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1602 1603 B->rmap->bs = B->cmap->bs = bs; 1604 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1605 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1606 1607 if (d_nnz) { 1608 for (i=0; i<B->rmap->n/bs; i++) { 1609 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1610 } 1611 } 1612 if (o_nnz) { 1613 for (i=0; i<B->rmap->n/bs; i++) { 1614 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1615 } 1616 } 1617 1618 b = (Mat_MPISBAIJ*)B->data; 1619 mbs = B->rmap->n/bs; 1620 Mbs = B->rmap->N/bs; 1621 if (mbs*bs != B->rmap->n) { 1622 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1623 } 1624 1625 B->rmap->bs = bs; 1626 b->bs2 = bs*bs; 1627 b->mbs = mbs; 1628 b->nbs = mbs; 1629 b->Mbs = Mbs; 1630 b->Nbs = Mbs; 1631 1632 for (i=0; i<=b->size; i++) { 1633 b->rangebs[i] = B->rmap->range[i]/bs; 1634 } 1635 b->rstartbs = B->rmap->rstart/bs; 1636 b->rendbs = B->rmap->rend/bs; 1637 1638 b->cstartbs = B->cmap->rstart/bs; 1639 b->cendbs = B->cmap->rend/bs; 1640 1641 if (!B->preallocated) { 1642 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1643 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1644 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1645 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1646 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1647 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1648 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1649 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1650 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1651 } 1652 1653 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1654 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1655 B->preallocated = PETSC_TRUE; 1656 PetscFunctionReturn(0); 1657 } 1658 EXTERN_C_END 1659 1660 EXTERN_C_BEGIN 1661 #if defined(PETSC_HAVE_MUMPS) 1662 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1663 #endif 1664 #if defined(PETSC_HAVE_SPOOLES) 1665 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1666 #endif 1667 #if defined(PETSC_HAVE_PASTIX) 1668 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1669 #endif 1670 EXTERN_C_END 1671 1672 /*MC 1673 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1674 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1675 1676 Options Database Keys: 1677 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1678 1679 Level: beginner 1680 1681 .seealso: MatCreateMPISBAIJ 1682 M*/ 1683 1684 EXTERN_C_BEGIN 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatCreate_MPISBAIJ" 1687 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1688 { 1689 Mat_MPISBAIJ *b; 1690 PetscErrorCode ierr; 1691 PetscTruth flg; 1692 1693 PetscFunctionBegin; 1694 1695 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1696 B->data = (void*)b; 1697 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1698 1699 B->ops->destroy = MatDestroy_MPISBAIJ; 1700 B->ops->view = MatView_MPISBAIJ; 1701 B->mapping = 0; 1702 B->assembled = PETSC_FALSE; 1703 1704 B->insertmode = NOT_SET_VALUES; 1705 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1706 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1707 1708 /* build local table of row and column ownerships */ 1709 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1710 1711 /* build cache for off array entries formed */ 1712 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1713 b->donotstash = PETSC_FALSE; 1714 b->colmap = PETSC_NULL; 1715 b->garray = PETSC_NULL; 1716 b->roworiented = PETSC_TRUE; 1717 1718 /* stuff used in block assembly */ 1719 b->barray = 0; 1720 1721 /* stuff used for matrix vector multiply */ 1722 b->lvec = 0; 1723 b->Mvctx = 0; 1724 b->slvec0 = 0; 1725 b->slvec0b = 0; 1726 b->slvec1 = 0; 1727 b->slvec1a = 0; 1728 b->slvec1b = 0; 1729 b->sMvctx = 0; 1730 1731 /* stuff for MatGetRow() */ 1732 b->rowindices = 0; 1733 b->rowvalues = 0; 1734 b->getrowactive = PETSC_FALSE; 1735 1736 /* hash table stuff */ 1737 b->ht = 0; 1738 b->hd = 0; 1739 b->ht_size = 0; 1740 b->ht_flag = PETSC_FALSE; 1741 b->ht_fact = 0; 1742 b->ht_total_ct = 0; 1743 b->ht_insert_ct = 0; 1744 1745 b->in_loc = 0; 1746 b->v_loc = 0; 1747 b->n_loc = 0; 1748 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1749 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1750 if (flg) { 1751 PetscReal fact = 1.39; 1752 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1753 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1754 if (fact <= 1.0) fact = 1.39; 1755 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1756 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1757 } 1758 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1759 1760 #if defined(PETSC_HAVE_PASTIX) 1761 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1762 "MatGetFactor_mpisbaij_pastix", 1763 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1764 #endif 1765 #if defined(PETSC_HAVE_MUMPS) 1766 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1767 "MatGetFactor_sbaij_mumps", 1768 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1769 #endif 1770 #if defined(PETSC_HAVE_SPOOLES) 1771 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1772 "MatGetFactor_mpisbaij_spooles", 1773 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1774 #endif 1775 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1776 "MatStoreValues_MPISBAIJ", 1777 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1778 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1779 "MatRetrieveValues_MPISBAIJ", 1780 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1781 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1782 "MatGetDiagonalBlock_MPISBAIJ", 1783 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1784 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1785 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1786 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1787 B->symmetric = PETSC_TRUE; 1788 B->structurally_symmetric = PETSC_TRUE; 1789 B->symmetric_set = PETSC_TRUE; 1790 B->structurally_symmetric_set = PETSC_TRUE; 1791 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1792 PetscFunctionReturn(0); 1793 } 1794 EXTERN_C_END 1795 1796 /*MC 1797 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1798 1799 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1800 and MATMPISBAIJ otherwise. 1801 1802 Options Database Keys: 1803 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1804 1805 Level: beginner 1806 1807 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1808 M*/ 1809 1810 EXTERN_C_BEGIN 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatCreate_SBAIJ" 1813 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1814 { 1815 PetscErrorCode ierr; 1816 PetscMPIInt size; 1817 1818 PetscFunctionBegin; 1819 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1820 if (size == 1) { 1821 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1822 } else { 1823 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1824 } 1825 PetscFunctionReturn(0); 1826 } 1827 EXTERN_C_END 1828 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1831 /*@C 1832 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1833 the user should preallocate the matrix storage by setting the parameters 1834 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1835 performance can be increased by more than a factor of 50. 1836 1837 Collective on Mat 1838 1839 Input Parameters: 1840 + A - the matrix 1841 . bs - size of blockk 1842 . d_nz - number of block nonzeros per block row in diagonal portion of local 1843 submatrix (same for all local rows) 1844 . d_nnz - array containing the number of block nonzeros in the various block rows 1845 in the upper triangular and diagonal part of the in diagonal portion of the local 1846 (possibly different for each block row) or PETSC_NULL. You must leave room 1847 for the diagonal entry even if it is zero. 1848 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1849 submatrix (same for all local rows). 1850 - o_nnz - array containing the number of nonzeros in the various block rows of the 1851 off-diagonal portion of the local submatrix (possibly different for 1852 each block row) or PETSC_NULL. 1853 1854 1855 Options Database Keys: 1856 . -mat_no_unroll - uses code that does not unroll the loops in the 1857 block calculations (much slower) 1858 . -mat_block_size - size of the blocks to use 1859 1860 Notes: 1861 1862 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1863 than it must be used on all processors that share the object for that argument. 1864 1865 If the *_nnz parameter is given then the *_nz parameter is ignored 1866 1867 Storage Information: 1868 For a square global matrix we define each processor's diagonal portion 1869 to be its local rows and the corresponding columns (a square submatrix); 1870 each processor's off-diagonal portion encompasses the remainder of the 1871 local matrix (a rectangular submatrix). 1872 1873 The user can specify preallocated storage for the diagonal part of 1874 the local submatrix with either d_nz or d_nnz (not both). Set 1875 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1876 memory allocation. Likewise, specify preallocated storage for the 1877 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1878 1879 You can call MatGetInfo() to get information on how effective the preallocation was; 1880 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1881 You can also run with the option -info and look for messages with the string 1882 malloc in them to see if additional memory allocation was needed. 1883 1884 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1885 the figure below we depict these three local rows and all columns (0-11). 1886 1887 .vb 1888 0 1 2 3 4 5 6 7 8 9 10 11 1889 ------------------- 1890 row 3 | o o o d d d o o o o o o 1891 row 4 | o o o d d d o o o o o o 1892 row 5 | o o o d d d o o o o o o 1893 ------------------- 1894 .ve 1895 1896 Thus, any entries in the d locations are stored in the d (diagonal) 1897 submatrix, and any entries in the o locations are stored in the 1898 o (off-diagonal) submatrix. Note that the d matrix is stored in 1899 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1900 1901 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1902 plus the diagonal part of the d matrix, 1903 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1904 In general, for PDE problems in which most nonzeros are near the diagonal, 1905 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1906 or you will get TERRIBLE performance; see the users' manual chapter on 1907 matrices. 1908 1909 Level: intermediate 1910 1911 .keywords: matrix, block, aij, compressed row, sparse, parallel 1912 1913 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1914 @*/ 1915 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1916 { 1917 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1918 1919 PetscFunctionBegin; 1920 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1921 if (f) { 1922 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1923 } 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatCreateMPISBAIJ" 1929 /*@C 1930 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1931 (block compressed row). For good matrix assembly performance 1932 the user should preallocate the matrix storage by setting the parameters 1933 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1934 performance can be increased by more than a factor of 50. 1935 1936 Collective on MPI_Comm 1937 1938 Input Parameters: 1939 + comm - MPI communicator 1940 . bs - size of blockk 1941 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1942 This value should be the same as the local size used in creating the 1943 y vector for the matrix-vector product y = Ax. 1944 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1945 This value should be the same as the local size used in creating the 1946 x vector for the matrix-vector product y = Ax. 1947 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1948 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1949 . d_nz - number of block nonzeros per block row in diagonal portion of local 1950 submatrix (same for all local rows) 1951 . d_nnz - array containing the number of block nonzeros in the various block rows 1952 in the upper triangular portion of the in diagonal portion of the local 1953 (possibly different for each block block row) or PETSC_NULL. 1954 You must leave room for the diagonal entry even if it is zero. 1955 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1956 submatrix (same for all local rows). 1957 - o_nnz - array containing the number of nonzeros in the various block rows of the 1958 off-diagonal portion of the local submatrix (possibly different for 1959 each block row) or PETSC_NULL. 1960 1961 Output Parameter: 1962 . A - the matrix 1963 1964 Options Database Keys: 1965 . -mat_no_unroll - uses code that does not unroll the loops in the 1966 block calculations (much slower) 1967 . -mat_block_size - size of the blocks to use 1968 . -mat_mpi - use the parallel matrix data structures even on one processor 1969 (defaults to using SeqBAIJ format on one processor) 1970 1971 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1972 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1973 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1974 1975 Notes: 1976 The number of rows and columns must be divisible by blocksize. 1977 This matrix type does not support complex Hermitian operation. 1978 1979 The user MUST specify either the local or global matrix dimensions 1980 (possibly both). 1981 1982 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1983 than it must be used on all processors that share the object for that argument. 1984 1985 If the *_nnz parameter is given then the *_nz parameter is ignored 1986 1987 Storage Information: 1988 For a square global matrix we define each processor's diagonal portion 1989 to be its local rows and the corresponding columns (a square submatrix); 1990 each processor's off-diagonal portion encompasses the remainder of the 1991 local matrix (a rectangular submatrix). 1992 1993 The user can specify preallocated storage for the diagonal part of 1994 the local submatrix with either d_nz or d_nnz (not both). Set 1995 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1996 memory allocation. Likewise, specify preallocated storage for the 1997 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1998 1999 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2000 the figure below we depict these three local rows and all columns (0-11). 2001 2002 .vb 2003 0 1 2 3 4 5 6 7 8 9 10 11 2004 ------------------- 2005 row 3 | o o o d d d o o o o o o 2006 row 4 | o o o d d d o o o o o o 2007 row 5 | o o o d d d o o o o o o 2008 ------------------- 2009 .ve 2010 2011 Thus, any entries in the d locations are stored in the d (diagonal) 2012 submatrix, and any entries in the o locations are stored in the 2013 o (off-diagonal) submatrix. Note that the d matrix is stored in 2014 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2015 2016 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2017 plus the diagonal part of the d matrix, 2018 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2019 In general, for PDE problems in which most nonzeros are near the diagonal, 2020 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2021 or you will get TERRIBLE performance; see the users' manual chapter on 2022 matrices. 2023 2024 Level: intermediate 2025 2026 .keywords: matrix, block, aij, compressed row, sparse, parallel 2027 2028 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2029 @*/ 2030 2031 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2032 { 2033 PetscErrorCode ierr; 2034 PetscMPIInt size; 2035 2036 PetscFunctionBegin; 2037 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2038 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2039 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2040 if (size > 1) { 2041 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2042 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2043 } else { 2044 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2045 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2046 } 2047 PetscFunctionReturn(0); 2048 } 2049 2050 2051 #undef __FUNCT__ 2052 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2053 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2054 { 2055 Mat mat; 2056 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2057 PetscErrorCode ierr; 2058 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2059 PetscScalar *array; 2060 2061 PetscFunctionBegin; 2062 *newmat = 0; 2063 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2064 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2065 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2066 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2067 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2068 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2069 2070 mat->factortype = matin->factortype; 2071 mat->preallocated = PETSC_TRUE; 2072 mat->assembled = PETSC_TRUE; 2073 mat->insertmode = NOT_SET_VALUES; 2074 2075 a = (Mat_MPISBAIJ*)mat->data; 2076 a->bs2 = oldmat->bs2; 2077 a->mbs = oldmat->mbs; 2078 a->nbs = oldmat->nbs; 2079 a->Mbs = oldmat->Mbs; 2080 a->Nbs = oldmat->Nbs; 2081 2082 2083 a->size = oldmat->size; 2084 a->rank = oldmat->rank; 2085 a->donotstash = oldmat->donotstash; 2086 a->roworiented = oldmat->roworiented; 2087 a->rowindices = 0; 2088 a->rowvalues = 0; 2089 a->getrowactive = PETSC_FALSE; 2090 a->barray = 0; 2091 a->rstartbs = oldmat->rstartbs; 2092 a->rendbs = oldmat->rendbs; 2093 a->cstartbs = oldmat->cstartbs; 2094 a->cendbs = oldmat->cendbs; 2095 2096 /* hash table stuff */ 2097 a->ht = 0; 2098 a->hd = 0; 2099 a->ht_size = 0; 2100 a->ht_flag = oldmat->ht_flag; 2101 a->ht_fact = oldmat->ht_fact; 2102 a->ht_total_ct = 0; 2103 a->ht_insert_ct = 0; 2104 2105 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2106 if (oldmat->colmap) { 2107 #if defined (PETSC_USE_CTABLE) 2108 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2109 #else 2110 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2111 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2112 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2113 #endif 2114 } else a->colmap = 0; 2115 2116 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2117 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2118 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2119 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2120 } else a->garray = 0; 2121 2122 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2123 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2124 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2125 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2126 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2127 2128 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2129 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2130 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2131 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2132 2133 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2134 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2135 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2136 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2137 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2138 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2139 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2140 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2141 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2142 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2143 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2144 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2145 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2146 2147 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2148 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2149 a->sMvctx = oldmat->sMvctx; 2150 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2151 2152 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2153 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2154 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2155 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2156 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2157 *newmat = mat; 2158 PetscFunctionReturn(0); 2159 } 2160 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatLoad_MPISBAIJ" 2163 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2164 { 2165 PetscErrorCode ierr; 2166 PetscInt i,nz,j,rstart,rend; 2167 PetscScalar *vals,*buf; 2168 MPI_Comm comm = ((PetscObject)viewer)->comm; 2169 MPI_Status status; 2170 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2171 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2172 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2173 PetscInt bs=1,Mbs,mbs,extra_rows; 2174 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2175 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2176 int fd; 2177 2178 PetscFunctionBegin; 2179 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2180 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2181 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2182 2183 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2184 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2185 if (!rank) { 2186 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2187 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2188 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2189 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2190 } 2191 2192 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2193 2194 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2195 M = header[1]; N = header[2]; 2196 2197 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2198 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2199 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2200 2201 /* If global sizes are set, check if they are consistent with that given in the file */ 2202 if (sizesset) { 2203 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2204 } 2205 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2206 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2207 2208 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2209 2210 /* 2211 This code adds extra rows to make sure the number of rows is 2212 divisible by the blocksize 2213 */ 2214 Mbs = M/bs; 2215 extra_rows = bs - M + bs*(Mbs); 2216 if (extra_rows == bs) extra_rows = 0; 2217 else Mbs++; 2218 if (extra_rows &&!rank) { 2219 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2220 } 2221 2222 /* determine ownership of all rows */ 2223 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2224 mbs = Mbs/size + ((Mbs % size) > rank); 2225 m = mbs*bs; 2226 } else { /* User Set */ 2227 m = newmat->rmap->n; 2228 mbs = m/bs; 2229 } 2230 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2231 mmbs = PetscMPIIntCast(mbs); 2232 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2233 rowners[0] = 0; 2234 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2235 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2236 rstart = rowners[rank]; 2237 rend = rowners[rank+1]; 2238 2239 /* distribute row lengths to all processors */ 2240 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2241 if (!rank) { 2242 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2243 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2244 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2245 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2246 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2247 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2248 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2249 } else { 2250 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2251 } 2252 2253 if (!rank) { /* procs[0] */ 2254 /* calculate the number of nonzeros on each processor */ 2255 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2256 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2257 for (i=0; i<size; i++) { 2258 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2259 procsnz[i] += rowlengths[j]; 2260 } 2261 } 2262 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2263 2264 /* determine max buffer needed and allocate it */ 2265 maxnz = 0; 2266 for (i=0; i<size; i++) { 2267 maxnz = PetscMax(maxnz,procsnz[i]); 2268 } 2269 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2270 2271 /* read in my part of the matrix column indices */ 2272 nz = procsnz[0]; 2273 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2274 mycols = ibuf; 2275 if (size == 1) nz -= extra_rows; 2276 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2277 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2278 2279 /* read in every ones (except the last) and ship off */ 2280 for (i=1; i<size-1; i++) { 2281 nz = procsnz[i]; 2282 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2283 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2284 } 2285 /* read in the stuff for the last proc */ 2286 if (size != 1) { 2287 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2288 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2289 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2290 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2291 } 2292 ierr = PetscFree(cols);CHKERRQ(ierr); 2293 } else { /* procs[i], i>0 */ 2294 /* determine buffer space needed for message */ 2295 nz = 0; 2296 for (i=0; i<m; i++) { 2297 nz += locrowlens[i]; 2298 } 2299 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2300 mycols = ibuf; 2301 /* receive message of column indices*/ 2302 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2303 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2304 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2305 } 2306 2307 /* loop over local rows, determining number of off diagonal entries */ 2308 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2309 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2310 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2311 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2312 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2313 rowcount = 0; 2314 nzcount = 0; 2315 for (i=0; i<mbs; i++) { 2316 dcount = 0; 2317 odcount = 0; 2318 for (j=0; j<bs; j++) { 2319 kmax = locrowlens[rowcount]; 2320 for (k=0; k<kmax; k++) { 2321 tmp = mycols[nzcount++]/bs; /* block col. index */ 2322 if (!mask[tmp]) { 2323 mask[tmp] = 1; 2324 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2325 else masked1[dcount++] = tmp; /* entry in diag portion */ 2326 } 2327 } 2328 rowcount++; 2329 } 2330 2331 dlens[i] = dcount; /* d_nzz[i] */ 2332 odlens[i] = odcount; /* o_nzz[i] */ 2333 2334 /* zero out the mask elements we set */ 2335 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2336 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2337 } 2338 if (!sizesset) { 2339 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2340 } 2341 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2342 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2343 2344 if (!rank) { 2345 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2346 /* read in my part of the matrix numerical values */ 2347 nz = procsnz[0]; 2348 vals = buf; 2349 mycols = ibuf; 2350 if (size == 1) nz -= extra_rows; 2351 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2352 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2353 2354 /* insert into matrix */ 2355 jj = rstart*bs; 2356 for (i=0; i<m; i++) { 2357 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2358 mycols += locrowlens[i]; 2359 vals += locrowlens[i]; 2360 jj++; 2361 } 2362 2363 /* read in other processors (except the last one) and ship out */ 2364 for (i=1; i<size-1; i++) { 2365 nz = procsnz[i]; 2366 vals = buf; 2367 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2368 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2369 } 2370 /* the last proc */ 2371 if (size != 1){ 2372 nz = procsnz[i] - extra_rows; 2373 vals = buf; 2374 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2375 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2376 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2377 } 2378 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2379 2380 } else { 2381 /* receive numeric values */ 2382 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2383 2384 /* receive message of values*/ 2385 vals = buf; 2386 mycols = ibuf; 2387 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2388 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2389 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2390 2391 /* insert into matrix */ 2392 jj = rstart*bs; 2393 for (i=0; i<m; i++) { 2394 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2395 mycols += locrowlens[i]; 2396 vals += locrowlens[i]; 2397 jj++; 2398 } 2399 } 2400 2401 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2402 ierr = PetscFree(buf);CHKERRQ(ierr); 2403 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2404 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2405 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2406 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2407 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2408 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2409 PetscFunctionReturn(0); 2410 } 2411 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2414 /*XXXXX@ 2415 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2416 2417 Input Parameters: 2418 . mat - the matrix 2419 . fact - factor 2420 2421 Not Collective on Mat, each process can have a different hash factor 2422 2423 Level: advanced 2424 2425 Notes: 2426 This can also be set by the command line option: -mat_use_hash_table fact 2427 2428 .keywords: matrix, hashtable, factor, HT 2429 2430 .seealso: MatSetOption() 2431 @XXXXX*/ 2432 2433 2434 #undef __FUNCT__ 2435 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2436 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2437 { 2438 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2439 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2440 PetscReal atmp; 2441 PetscReal *work,*svalues,*rvalues; 2442 PetscErrorCode ierr; 2443 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2444 PetscMPIInt rank,size; 2445 PetscInt *rowners_bs,dest,count,source; 2446 PetscScalar *va; 2447 MatScalar *ba; 2448 MPI_Status stat; 2449 2450 PetscFunctionBegin; 2451 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2452 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2453 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2454 2455 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2456 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2457 2458 bs = A->rmap->bs; 2459 mbs = a->mbs; 2460 Mbs = a->Mbs; 2461 ba = b->a; 2462 bi = b->i; 2463 bj = b->j; 2464 2465 /* find ownerships */ 2466 rowners_bs = A->rmap->range; 2467 2468 /* each proc creates an array to be distributed */ 2469 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2470 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2471 2472 /* row_max for B */ 2473 if (rank != size-1){ 2474 for (i=0; i<mbs; i++) { 2475 ncols = bi[1] - bi[0]; bi++; 2476 brow = bs*i; 2477 for (j=0; j<ncols; j++){ 2478 bcol = bs*(*bj); 2479 for (kcol=0; kcol<bs; kcol++){ 2480 col = bcol + kcol; /* local col index */ 2481 col += rowners_bs[rank+1]; /* global col index */ 2482 for (krow=0; krow<bs; krow++){ 2483 atmp = PetscAbsScalar(*ba); ba++; 2484 row = brow + krow; /* local row index */ 2485 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2486 if (work[col] < atmp) work[col] = atmp; 2487 } 2488 } 2489 bj++; 2490 } 2491 } 2492 2493 /* send values to its owners */ 2494 for (dest=rank+1; dest<size; dest++){ 2495 svalues = work + rowners_bs[dest]; 2496 count = rowners_bs[dest+1]-rowners_bs[dest]; 2497 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2498 } 2499 } 2500 2501 /* receive values */ 2502 if (rank){ 2503 rvalues = work; 2504 count = rowners_bs[rank+1]-rowners_bs[rank]; 2505 for (source=0; source<rank; source++){ 2506 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2507 /* process values */ 2508 for (i=0; i<count; i++){ 2509 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2510 } 2511 } 2512 } 2513 2514 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2515 ierr = PetscFree(work);CHKERRQ(ierr); 2516 PetscFunctionReturn(0); 2517 } 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "MatSOR_MPISBAIJ" 2521 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2522 { 2523 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2524 PetscErrorCode ierr; 2525 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2526 PetscScalar *x,*ptr,*from; 2527 Vec bb1; 2528 const PetscScalar *b; 2529 2530 PetscFunctionBegin; 2531 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2532 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2533 2534 if (flag == SOR_APPLY_UPPER) { 2535 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2540 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2541 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2542 its--; 2543 } 2544 2545 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2546 while (its--){ 2547 2548 /* lower triangular part: slvec0b = - B^T*xx */ 2549 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2550 2551 /* copy xx into slvec0a */ 2552 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2553 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2554 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2555 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2556 2557 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2558 2559 /* copy bb into slvec1a */ 2560 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2561 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2562 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2563 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2564 2565 /* set slvec1b = 0 */ 2566 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2567 2568 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2569 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2570 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2571 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2572 2573 /* upper triangular part: bb1 = bb1 - B*x */ 2574 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2575 2576 /* local diagonal sweep */ 2577 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2578 } 2579 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2580 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2581 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2582 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2583 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2584 } else if (flag & SOR_EISENSTAT) { 2585 Vec xx1; 2586 PetscTruth hasop; 2587 const PetscScalar *diag; 2588 PetscScalar *sl,scale = (omega - 2.0)/omega; 2589 PetscInt i,n; 2590 2591 if (!mat->xx1) { 2592 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2593 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2594 } 2595 xx1 = mat->xx1; 2596 bb1 = mat->bb1; 2597 2598 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2599 2600 if (!mat->diag) { 2601 /* this is wrong for same matrix with new nonzero values */ 2602 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2603 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2604 } 2605 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2606 2607 if (hasop) { 2608 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2609 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2610 } else { 2611 /* 2612 These two lines are replaced by code that may be a bit faster for a good compiler 2613 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2614 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2615 */ 2616 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2617 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2618 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2619 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2620 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2621 if (omega == 1.0) { 2622 for (i=0; i<n; i++) { 2623 sl[i] = b[i] - diag[i]*x[i]; 2624 } 2625 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2626 } else { 2627 for (i=0; i<n; i++) { 2628 sl[i] = b[i] + scale*diag[i]*x[i]; 2629 } 2630 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2631 } 2632 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2633 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2634 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2635 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2636 } 2637 2638 /* multiply off-diagonal portion of matrix */ 2639 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2640 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2641 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2642 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2643 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2644 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2645 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2646 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2647 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2648 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2649 2650 /* local sweep */ 2651 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2652 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2653 } else { 2654 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2655 } 2656 PetscFunctionReturn(0); 2657 } 2658 2659 #undef __FUNCT__ 2660 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2661 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2662 { 2663 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2664 PetscErrorCode ierr; 2665 Vec lvec1,bb1; 2666 2667 PetscFunctionBegin; 2668 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2669 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2670 2671 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2672 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2673 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2674 its--; 2675 } 2676 2677 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2678 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2679 while (its--){ 2680 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2681 2682 /* lower diagonal part: bb1 = bb - B^T*xx */ 2683 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2684 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2685 2686 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2687 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2688 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2689 2690 /* upper diagonal part: bb1 = bb1 - B*x */ 2691 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2692 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2693 2694 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2695 2696 /* diagonal sweep */ 2697 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2698 } 2699 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2700 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2701 } else { 2702 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2703 } 2704 PetscFunctionReturn(0); 2705 } 2706 2707