1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h" 5 #include "../src/mat/impls/sbaij/seq/sbaij.h" 6 #include "petscblaslapack.h" 7 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 EXTERN_C_BEGIN 25 #undef __FUNCT__ 26 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 28 { 29 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 30 PetscErrorCode ierr; 31 32 PetscFunctionBegin; 33 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 34 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 EXTERN_C_BEGIN 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 43 { 44 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 45 PetscErrorCode ierr; 46 47 PetscFunctionBegin; 48 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 49 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 EXTERN_C_END 53 54 55 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 56 { \ 57 \ 58 brow = row/bs; \ 59 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 60 rmax = aimax[brow]; nrow = ailen[brow]; \ 61 bcol = col/bs; \ 62 ridx = row % bs; cidx = col % bs; \ 63 low = 0; high = nrow; \ 64 while (high-low > 3) { \ 65 t = (low+high)/2; \ 66 if (rp[t] > bcol) high = t; \ 67 else low = t; \ 68 } \ 69 for (_i=low; _i<high; _i++) { \ 70 if (rp[_i] > bcol) break; \ 71 if (rp[_i] == bcol) { \ 72 bap = ap + bs2*_i + bs*cidx + ridx; \ 73 if (addv == ADD_VALUES) *bap += value; \ 74 else *bap = value; \ 75 goto a_noinsert; \ 76 } \ 77 } \ 78 if (a->nonew == 1) goto a_noinsert; \ 79 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 80 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 81 N = nrow++ - 1; \ 82 /* shift up all the later entries in this row */ \ 83 for (ii=N; ii>=_i; ii--) { \ 84 rp[ii+1] = rp[ii]; \ 85 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 86 } \ 87 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 88 rp[_i] = bcol; \ 89 ap[bs2*_i + bs*cidx + ridx] = value; \ 90 a_noinsert:; \ 91 ailen[brow] = nrow; \ 92 } 93 94 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 95 { \ 96 brow = row/bs; \ 97 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 98 rmax = bimax[brow]; nrow = bilen[brow]; \ 99 bcol = col/bs; \ 100 ridx = row % bs; cidx = col % bs; \ 101 low = 0; high = nrow; \ 102 while (high-low > 3) { \ 103 t = (low+high)/2; \ 104 if (rp[t] > bcol) high = t; \ 105 else low = t; \ 106 } \ 107 for (_i=low; _i<high; _i++) { \ 108 if (rp[_i] > bcol) break; \ 109 if (rp[_i] == bcol) { \ 110 bap = ap + bs2*_i + bs*cidx + ridx; \ 111 if (addv == ADD_VALUES) *bap += value; \ 112 else *bap = value; \ 113 goto b_noinsert; \ 114 } \ 115 } \ 116 if (b->nonew == 1) goto b_noinsert; \ 117 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 118 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 119 N = nrow++ - 1; \ 120 /* shift up all the later entries in this row */ \ 121 for (ii=N; ii>=_i; ii--) { \ 122 rp[ii+1] = rp[ii]; \ 123 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 124 } \ 125 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 126 rp[_i] = bcol; \ 127 ap[bs2*_i + bs*cidx + ridx] = value; \ 128 b_noinsert:; \ 129 bilen[brow] = nrow; \ 130 } 131 132 /* Only add/insert a(i,j) with i<=j (blocks). 133 Any a(i,j) with i>j input by user is ingored. 134 */ 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatSetValues_MPISBAIJ" 137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 138 { 139 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 140 MatScalar value; 141 PetscTruth roworiented = baij->roworiented; 142 PetscErrorCode ierr; 143 PetscInt i,j,row,col; 144 PetscInt rstart_orig=mat->rmap->rstart; 145 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 146 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 147 148 /* Some Variables required in the macro */ 149 Mat A = baij->A; 150 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 151 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 152 MatScalar *aa=a->a; 153 154 Mat B = baij->B; 155 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 156 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 157 MatScalar *ba=b->a; 158 159 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 160 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 161 MatScalar *ap,*bap; 162 163 /* for stash */ 164 PetscInt n_loc, *in_loc = PETSC_NULL; 165 MatScalar *v_loc = PETSC_NULL; 166 167 PetscFunctionBegin; 168 if (v) PetscValidScalarPointer(v,6); 169 if (!baij->donotstash){ 170 if (n > baij->n_loc) { 171 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 172 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 174 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 175 baij->n_loc = n; 176 } 177 in_loc = baij->in_loc; 178 v_loc = baij->v_loc; 179 } 180 181 for (i=0; i<m; i++) { 182 if (im[i] < 0) continue; 183 #if defined(PETSC_USE_DEBUG) 184 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 185 #endif 186 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 187 row = im[i] - rstart_orig; /* local row index */ 188 for (j=0; j<n; j++) { 189 if (im[i]/bs > in[j]/bs){ 190 if (a->ignore_ltriangular){ 191 continue; /* ignore lower triangular blocks */ 192 } else { 193 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 194 } 195 } 196 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 197 col = in[j] - cstart_orig; /* local col index */ 198 brow = row/bs; bcol = col/bs; 199 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 200 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 201 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 202 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 203 } else if (in[j] < 0) continue; 204 #if defined(PETSC_USE_DEBUG) 205 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 206 #endif 207 else { /* off-diag entry (B) */ 208 if (mat->was_assembled) { 209 if (!baij->colmap) { 210 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 211 } 212 #if defined (PETSC_USE_CTABLE) 213 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 214 col = col - 1; 215 #else 216 col = baij->colmap[in[j]/bs] - 1; 217 #endif 218 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 219 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 220 col = in[j]; 221 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 222 B = baij->B; 223 b = (Mat_SeqBAIJ*)(B)->data; 224 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 225 ba=b->a; 226 } else col += in[j]%bs; 227 } else col = in[j]; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } 232 } 233 } else { /* off processor entry */ 234 if (!baij->donotstash) { 235 n_loc = 0; 236 for (j=0; j<n; j++){ 237 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 238 in_loc[n_loc] = in[j]; 239 if (roworiented) { 240 v_loc[n_loc] = v[i*n+j]; 241 } else { 242 v_loc[n_loc] = v[j*m+i]; 243 } 244 n_loc++; 245 } 246 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 247 } 248 } 249 } 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 256 { 257 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 258 const MatScalar *value; 259 MatScalar *barray=baij->barray; 260 PetscTruth roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 261 PetscErrorCode ierr; 262 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 263 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 264 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 265 266 PetscFunctionBegin; 267 if(!barray) { 268 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 269 baij->barray = barray; 270 } 271 272 if (roworiented) { 273 stepval = (n-1)*bs; 274 } else { 275 stepval = (m-1)*bs; 276 } 277 for (i=0; i<m; i++) { 278 if (im[i] < 0) continue; 279 #if defined(PETSC_USE_DEBUG) 280 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 281 #endif 282 if (im[i] >= rstart && im[i] < rend) { 283 row = im[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (im[i] > in[j]) { 286 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 287 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 288 } 289 /* If NumCol = 1 then a copy is not required */ 290 if ((roworiented) && (n == 1)) { 291 barray = (MatScalar*) v + i*bs2; 292 } else if((!roworiented) && (m == 1)) { 293 barray = (MatScalar*) v + j*bs2; 294 } else { /* Here a copy is required */ 295 if (roworiented) { 296 value = v + i*(stepval+bs)*bs + j*bs; 297 } else { 298 value = v + j*(stepval+bs)*bs + i*bs; 299 } 300 for (ii=0; ii<bs; ii++,value+=stepval) { 301 for (jj=0; jj<bs; jj++) { 302 *barray++ = *value++; 303 } 304 } 305 barray -=bs2; 306 } 307 308 if (in[j] >= cstart && in[j] < cend){ 309 col = in[j] - cstart; 310 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 311 } 312 else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined (PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined (PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } 343 else col = in[j]; 344 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 345 } 346 } 347 } else { 348 if (!baij->donotstash) { 349 if (roworiented) { 350 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 351 } else { 352 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 353 } 354 } 355 } 356 } 357 PetscFunctionReturn(0); 358 } 359 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatGetValues_MPISBAIJ" 362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 363 { 364 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 365 PetscErrorCode ierr; 366 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 367 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 368 369 PetscFunctionBegin; 370 for (i=0; i<m; i++) { 371 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 372 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 373 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 374 row = idxm[i] - bsrstart; 375 for (j=0; j<n; j++) { 376 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 377 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 378 if (idxn[j] >= bscstart && idxn[j] < bscend){ 379 col = idxn[j] - bscstart; 380 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 381 } else { 382 if (!baij->colmap) { 383 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 384 } 385 #if defined (PETSC_USE_CTABLE) 386 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 387 data --; 388 #else 389 data = baij->colmap[idxn[j]/bs]-1; 390 #endif 391 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 392 else { 393 col = data + idxn[j]%bs; 394 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 395 } 396 } 397 } 398 } else { 399 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatNorm_MPISBAIJ" 407 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 408 { 409 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscReal sum[2],*lnorm2; 412 413 PetscFunctionBegin; 414 if (baij->size == 1) { 415 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 416 } else { 417 if (type == NORM_FROBENIUS) { 418 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 419 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 420 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 421 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 422 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 423 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 424 *norm = sqrt(sum[0] + 2*sum[1]); 425 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 426 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 427 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 428 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 429 PetscReal *rsum,*rsum2,vabs; 430 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 431 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 432 MatScalar *v; 433 434 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 435 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 436 /* Amat */ 437 v = amat->a; jj = amat->j; 438 for (brow=0; brow<mbs; brow++) { 439 grow = bs*(rstart + brow); 440 nz = amat->i[brow+1] - amat->i[brow]; 441 for (bcol=0; bcol<nz; bcol++){ 442 gcol = bs*(rstart + *jj); jj++; 443 for (col=0; col<bs; col++){ 444 for (row=0; row<bs; row++){ 445 vabs = PetscAbsScalar(*v); v++; 446 rsum[gcol+col] += vabs; 447 /* non-diagonal block */ 448 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 449 } 450 } 451 } 452 } 453 /* Bmat */ 454 v = bmat->a; jj = bmat->j; 455 for (brow=0; brow<mbs; brow++) { 456 grow = bs*(rstart + brow); 457 nz = bmat->i[brow+1] - bmat->i[brow]; 458 for (bcol=0; bcol<nz; bcol++){ 459 gcol = bs*garray[*jj]; jj++; 460 for (col=0; col<bs; col++){ 461 for (row=0; row<bs; row++){ 462 vabs = PetscAbsScalar(*v); v++; 463 rsum[gcol+col] += vabs; 464 rsum[grow+row] += vabs; 465 } 466 } 467 } 468 } 469 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 470 *norm = 0.0; 471 for (col=0; col<mat->cmap->N; col++) { 472 if (rsum2[col] > *norm) *norm = rsum2[col]; 473 } 474 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 475 } else { 476 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 477 } 478 } 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 484 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 485 { 486 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 487 PetscErrorCode ierr; 488 PetscInt nstash,reallocs; 489 InsertMode addv; 490 491 PetscFunctionBegin; 492 if (baij->donotstash) { 493 PetscFunctionReturn(0); 494 } 495 496 /* make sure all processors are either in INSERTMODE or ADDMODE */ 497 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 498 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 499 mat->insertmode = addv; /* in case this processor had no cache */ 500 501 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 502 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 503 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 504 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 505 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 506 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 513 { 514 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 515 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 516 PetscErrorCode ierr; 517 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 518 PetscInt *row,*col; 519 PetscTruth other_disassembled; 520 PetscMPIInt n; 521 PetscTruth r1,r2,r3; 522 MatScalar *val; 523 InsertMode addv = mat->insertmode; 524 525 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 526 PetscFunctionBegin; 527 528 if (!baij->donotstash) { 529 while (1) { 530 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 531 if (!flg) break; 532 533 for (i=0; i<n;) { 534 /* Now identify the consecutive vals belonging to the same row */ 535 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 536 if (j < n) ncols = j-i; 537 else ncols = n-i; 538 /* Now assemble all these values with a single function call */ 539 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 540 i = j; 541 } 542 } 543 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 544 /* Now process the block-stash. Since the values are stashed column-oriented, 545 set the roworiented flag to column oriented, and after MatSetValues() 546 restore the original flags */ 547 r1 = baij->roworiented; 548 r2 = a->roworiented; 549 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 550 baij->roworiented = PETSC_FALSE; 551 a->roworiented = PETSC_FALSE; 552 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 553 while (1) { 554 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 555 if (!flg) break; 556 557 for (i=0; i<n;) { 558 /* Now identify the consecutive vals belonging to the same row */ 559 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 560 if (j < n) ncols = j-i; 561 else ncols = n-i; 562 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 563 i = j; 564 } 565 } 566 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 567 baij->roworiented = r1; 568 a->roworiented = r2; 569 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 570 } 571 572 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 573 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 574 575 /* determine if any processor has disassembled, if so we must 576 also disassemble ourselfs, in order that we may reassemble. */ 577 /* 578 if nonzero structure of submatrix B cannot change then we know that 579 no processor disassembled thus we can skip this stuff 580 */ 581 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 582 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 583 if (mat->was_assembled && !other_disassembled) { 584 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 585 } 586 } 587 588 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 589 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 590 } 591 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 592 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 594 595 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 596 baij->rowvalues = 0; 597 598 PetscFunctionReturn(0); 599 } 600 601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 605 { 606 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 607 PetscErrorCode ierr; 608 PetscInt bs = mat->rmap->bs; 609 PetscMPIInt size = baij->size,rank = baij->rank; 610 PetscTruth iascii,isdraw; 611 PetscViewer sviewer; 612 PetscViewerFormat format; 613 614 PetscFunctionBegin; 615 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 616 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 617 if (iascii) { 618 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 619 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 620 MatInfo info; 621 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 622 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 623 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 624 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 625 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 626 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 628 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 630 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 631 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 632 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 633 PetscFunctionReturn(0); 634 } else if (format == PETSC_VIEWER_ASCII_INFO) { 635 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 638 PetscFunctionReturn(0); 639 } 640 } 641 642 if (isdraw) { 643 PetscDraw draw; 644 PetscTruth isnull; 645 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 646 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 647 } 648 649 if (size == 1) { 650 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 651 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 652 } else { 653 /* assemble the entire matrix onto first processor. */ 654 Mat A; 655 Mat_SeqSBAIJ *Aloc; 656 Mat_SeqBAIJ *Bloc; 657 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 658 MatScalar *a; 659 660 /* Should this be the same type as mat? */ 661 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 662 if (!rank) { 663 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 664 } else { 665 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 666 } 667 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 668 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 669 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 670 671 /* copy over the A part */ 672 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 673 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 674 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 675 676 for (i=0; i<mbs; i++) { 677 rvals[0] = bs*(baij->rstartbs + i); 678 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 679 for (j=ai[i]; j<ai[i+1]; j++) { 680 col = (baij->cstartbs+aj[j])*bs; 681 for (k=0; k<bs; k++) { 682 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 683 col++; a += bs; 684 } 685 } 686 } 687 /* copy over the B part */ 688 Bloc = (Mat_SeqBAIJ*)baij->B->data; 689 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 690 for (i=0; i<mbs; i++) { 691 692 rvals[0] = bs*(baij->rstartbs + i); 693 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 694 for (j=ai[i]; j<ai[i+1]; j++) { 695 col = baij->garray[aj[j]]*bs; 696 for (k=0; k<bs; k++) { 697 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 698 col++; a += bs; 699 } 700 } 701 } 702 ierr = PetscFree(rvals);CHKERRQ(ierr); 703 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 705 /* 706 Everyone has to call to draw the matrix since the graphics waits are 707 synchronized across all processors that share the PetscDraw object 708 */ 709 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 710 if (!rank) { 711 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 712 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 713 } 714 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 715 ierr = MatDestroy(A);CHKERRQ(ierr); 716 } 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNCT__ 721 #define __FUNCT__ "MatView_MPISBAIJ" 722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 723 { 724 PetscErrorCode ierr; 725 PetscTruth iascii,isdraw,issocket,isbinary; 726 727 PetscFunctionBegin; 728 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 730 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 732 if (iascii || isdraw || issocket || isbinary) { 733 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 734 } else { 735 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatDestroy_MPISBAIJ" 742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 743 { 744 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 #if defined(PETSC_USE_LOG) 749 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 750 #endif 751 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 752 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 753 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 754 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 755 #if defined (PETSC_USE_CTABLE) 756 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 757 #else 758 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 759 #endif 760 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 761 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 762 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 763 if (baij->slvec0) { 764 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 765 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 766 } 767 if (baij->slvec1) { 768 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 769 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 770 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 771 } 772 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 773 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 774 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 775 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 776 if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);} 777 if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);} 778 if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);} 779 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 780 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 781 #endif 782 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 783 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 784 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 785 ierr = PetscFree(baij);CHKERRQ(ierr); 786 787 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 798 { 799 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 800 PetscErrorCode ierr; 801 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 802 PetscScalar *x,*from; 803 804 PetscFunctionBegin; 805 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 806 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 807 808 /* diagonal part */ 809 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 810 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 811 812 /* subdiagonal part */ 813 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 814 815 /* copy x into the vec slvec0 */ 816 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 819 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 820 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 821 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 822 823 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 /* supperdiagonal part */ 826 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatMult_MPISBAIJ" 832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 833 { 834 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 835 PetscErrorCode ierr; 836 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 837 PetscScalar *x,*from; 838 839 PetscFunctionBegin; 840 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 841 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 842 843 /* diagonal part */ 844 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 845 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 846 847 /* subdiagonal part */ 848 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 849 850 /* copy x into the vec slvec0 */ 851 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 852 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 853 854 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 855 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 856 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 857 858 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 /* supperdiagonal part */ 861 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 868 { 869 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 870 PetscErrorCode ierr; 871 PetscInt nt; 872 873 PetscFunctionBegin; 874 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 875 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 876 877 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 878 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 879 880 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 /* do diagonal part */ 882 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 883 /* do supperdiagonal part */ 884 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 886 /* do subdiagonal part */ 887 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 888 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 889 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 890 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 896 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 897 { 898 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 899 PetscErrorCode ierr; 900 PetscInt mbs=a->mbs,bs=A->rmap->bs; 901 PetscScalar *x,*from,zero=0.0; 902 903 PetscFunctionBegin; 904 /* 905 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 906 PetscSynchronizedFlush(((PetscObject)A)->comm); 907 */ 908 /* diagonal part */ 909 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 910 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 911 912 /* subdiagonal part */ 913 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 914 915 /* copy x into the vec slvec0 */ 916 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 917 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 918 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 919 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 920 921 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 923 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924 925 /* supperdiagonal part */ 926 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 927 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 934 { 935 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 936 PetscErrorCode ierr; 937 938 PetscFunctionBegin; 939 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 /* do diagonal part */ 941 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 942 /* do supperdiagonal part */ 943 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 945 946 /* do subdiagonal part */ 947 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 948 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 949 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 950 951 PetscFunctionReturn(0); 952 } 953 954 /* 955 This only works correctly for square matrices where the subblock A->A is the 956 diagonal block 957 */ 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 961 { 962 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 967 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatScale_MPISBAIJ" 973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 974 { 975 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 980 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatGetRow_MPISBAIJ" 986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 987 { 988 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 989 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 990 PetscErrorCode ierr; 991 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 992 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 993 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 994 995 PetscFunctionBegin; 996 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 997 mat->getrowactive = PETSC_TRUE; 998 999 if (!mat->rowvalues && (idx || v)) { 1000 /* 1001 allocate enough space to hold information from the longest row. 1002 */ 1003 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1004 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1005 PetscInt max = 1,mbs = mat->mbs,tmp; 1006 for (i=0; i<mbs; i++) { 1007 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1008 if (max < tmp) { max = tmp; } 1009 } 1010 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1011 } 1012 1013 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1014 lrow = row - brstart; /* local row index */ 1015 1016 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1017 if (!v) {pvA = 0; pvB = 0;} 1018 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1019 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1020 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1021 nztot = nzA + nzB; 1022 1023 cmap = mat->garray; 1024 if (v || idx) { 1025 if (nztot) { 1026 /* Sort by increasing column numbers, assuming A and B already sorted */ 1027 PetscInt imark = -1; 1028 if (v) { 1029 *v = v_p = mat->rowvalues; 1030 for (i=0; i<nzB; i++) { 1031 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1032 else break; 1033 } 1034 imark = i; 1035 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1036 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1037 } 1038 if (idx) { 1039 *idx = idx_p = mat->rowindices; 1040 if (imark > -1) { 1041 for (i=0; i<imark; i++) { 1042 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1043 } 1044 } else { 1045 for (i=0; i<nzB; i++) { 1046 if (cmap[cworkB[i]/bs] < cstart) 1047 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1048 else break; 1049 } 1050 imark = i; 1051 } 1052 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1053 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1054 } 1055 } else { 1056 if (idx) *idx = 0; 1057 if (v) *v = 0; 1058 } 1059 } 1060 *nz = nztot; 1061 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1062 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1069 { 1070 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1071 1072 PetscFunctionBegin; 1073 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1074 baij->getrowactive = PETSC_FALSE; 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1081 { 1082 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1083 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1084 1085 PetscFunctionBegin; 1086 aA->getrow_utriangular = PETSC_TRUE; 1087 PetscFunctionReturn(0); 1088 } 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1092 { 1093 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1094 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1095 1096 PetscFunctionBegin; 1097 aA->getrow_utriangular = PETSC_FALSE; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1104 { 1105 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1110 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1117 { 1118 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1123 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1130 { 1131 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1136 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1143 { 1144 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1145 Mat A = a->A,B = a->B; 1146 PetscErrorCode ierr; 1147 PetscReal isend[5],irecv[5]; 1148 1149 PetscFunctionBegin; 1150 info->block_size = (PetscReal)matin->rmap->bs; 1151 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1152 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1153 isend[3] = info->memory; isend[4] = info->mallocs; 1154 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1155 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1156 isend[3] += info->memory; isend[4] += info->mallocs; 1157 if (flag == MAT_LOCAL) { 1158 info->nz_used = isend[0]; 1159 info->nz_allocated = isend[1]; 1160 info->nz_unneeded = isend[2]; 1161 info->memory = isend[3]; 1162 info->mallocs = isend[4]; 1163 } else if (flag == MAT_GLOBAL_MAX) { 1164 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1165 info->nz_used = irecv[0]; 1166 info->nz_allocated = irecv[1]; 1167 info->nz_unneeded = irecv[2]; 1168 info->memory = irecv[3]; 1169 info->mallocs = irecv[4]; 1170 } else if (flag == MAT_GLOBAL_SUM) { 1171 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1172 info->nz_used = irecv[0]; 1173 info->nz_allocated = irecv[1]; 1174 info->nz_unneeded = irecv[2]; 1175 info->memory = irecv[3]; 1176 info->mallocs = irecv[4]; 1177 } else { 1178 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1179 } 1180 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1181 info->fill_ratio_needed = 0; 1182 info->factor_mallocs = 0; 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg) 1189 { 1190 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1191 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 switch (op) { 1196 case MAT_NEW_NONZERO_LOCATIONS: 1197 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1198 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1199 case MAT_KEEP_NONZERO_PATTERN: 1200 case MAT_NEW_NONZERO_LOCATION_ERR: 1201 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1202 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1203 break; 1204 case MAT_ROW_ORIENTED: 1205 a->roworiented = flg; 1206 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1207 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1208 break; 1209 case MAT_NEW_DIAGONALS: 1210 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1211 break; 1212 case MAT_IGNORE_OFF_PROC_ENTRIES: 1213 a->donotstash = flg; 1214 break; 1215 case MAT_USE_HASH_TABLE: 1216 a->ht_flag = flg; 1217 break; 1218 case MAT_HERMITIAN: 1219 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1220 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1221 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1222 break; 1223 case MAT_SYMMETRIC: 1224 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1225 break; 1226 case MAT_STRUCTURALLY_SYMMETRIC: 1227 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1228 break; 1229 case MAT_SYMMETRY_ETERNAL: 1230 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1231 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1232 break; 1233 case MAT_IGNORE_LOWER_TRIANGULAR: 1234 aA->ignore_ltriangular = flg; 1235 break; 1236 case MAT_ERROR_LOWER_TRIANGULAR: 1237 aA->ignore_ltriangular = flg; 1238 break; 1239 case MAT_GETROW_UPPERTRIANGULAR: 1240 aA->getrow_utriangular = flg; 1241 break; 1242 default: 1243 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1244 } 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1250 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1251 { 1252 PetscErrorCode ierr; 1253 PetscFunctionBegin; 1254 if (MAT_INITIAL_MATRIX || *B != A) { 1255 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1262 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1263 { 1264 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1265 Mat a=baij->A, b=baij->B; 1266 PetscErrorCode ierr; 1267 PetscInt nv,m,n; 1268 PetscTruth flg; 1269 1270 PetscFunctionBegin; 1271 if (ll != rr){ 1272 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1273 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1274 } 1275 if (!ll) PetscFunctionReturn(0); 1276 1277 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1278 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1279 1280 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1281 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1282 1283 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 1285 /* left diagonalscale the off-diagonal part */ 1286 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1287 1288 /* scale the diagonal part */ 1289 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1290 1291 /* right diagonalscale the off-diagonal part */ 1292 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1293 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1299 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1300 { 1301 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1302 PetscErrorCode ierr; 1303 1304 PetscFunctionBegin; 1305 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatEqual_MPISBAIJ" 1313 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1314 { 1315 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1316 Mat a,b,c,d; 1317 PetscTruth flg; 1318 PetscErrorCode ierr; 1319 1320 PetscFunctionBegin; 1321 a = matA->A; b = matA->B; 1322 c = matB->A; d = matB->B; 1323 1324 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1325 if (flg) { 1326 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1327 } 1328 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatCopy_MPISBAIJ" 1334 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1335 { 1336 PetscErrorCode ierr; 1337 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1338 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1339 1340 PetscFunctionBegin; 1341 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1342 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1343 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1344 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1345 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1346 } else { 1347 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1348 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1349 } 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1355 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1356 { 1357 PetscErrorCode ierr; 1358 1359 PetscFunctionBegin; 1360 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1366 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1367 { 1368 PetscErrorCode ierr; 1369 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1370 PetscBLASInt bnz,one=1; 1371 Mat_SeqSBAIJ *xa,*ya; 1372 Mat_SeqBAIJ *xb,*yb; 1373 1374 PetscFunctionBegin; 1375 if (str == SAME_NONZERO_PATTERN) { 1376 PetscScalar alpha = a; 1377 xa = (Mat_SeqSBAIJ *)xx->A->data; 1378 ya = (Mat_SeqSBAIJ *)yy->A->data; 1379 bnz = PetscBLASIntCast(xa->nz); 1380 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1381 xb = (Mat_SeqBAIJ *)xx->B->data; 1382 yb = (Mat_SeqBAIJ *)yy->B->data; 1383 bnz = PetscBLASIntCast(xb->nz); 1384 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1385 } else { 1386 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1387 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1388 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1389 } 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1395 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1396 { 1397 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1398 PetscInt rbs,cbs; 1399 PetscErrorCode ierr; 1400 1401 PetscFunctionBegin; 1402 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1403 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1404 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1405 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1406 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1407 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1413 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1414 { 1415 PetscErrorCode ierr; 1416 PetscInt i; 1417 PetscTruth flg; 1418 1419 PetscFunctionBegin; 1420 for (i=0; i<n; i++) { 1421 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1422 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1423 } 1424 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 1429 /* -------------------------------------------------------------------*/ 1430 static struct _MatOps MatOps_Values = { 1431 MatSetValues_MPISBAIJ, 1432 MatGetRow_MPISBAIJ, 1433 MatRestoreRow_MPISBAIJ, 1434 MatMult_MPISBAIJ, 1435 /* 4*/ MatMultAdd_MPISBAIJ, 1436 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1437 MatMultAdd_MPISBAIJ, 1438 0, 1439 0, 1440 0, 1441 /*10*/ 0, 1442 0, 1443 0, 1444 MatSOR_MPISBAIJ, 1445 MatTranspose_MPISBAIJ, 1446 /*15*/ MatGetInfo_MPISBAIJ, 1447 MatEqual_MPISBAIJ, 1448 MatGetDiagonal_MPISBAIJ, 1449 MatDiagonalScale_MPISBAIJ, 1450 MatNorm_MPISBAIJ, 1451 /*20*/ MatAssemblyBegin_MPISBAIJ, 1452 MatAssemblyEnd_MPISBAIJ, 1453 MatSetOption_MPISBAIJ, 1454 MatZeroEntries_MPISBAIJ, 1455 /*24*/ 0, 1456 0, 1457 0, 1458 0, 1459 0, 1460 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1461 0, 1462 0, 1463 0, 1464 0, 1465 /*34*/ MatDuplicate_MPISBAIJ, 1466 0, 1467 0, 1468 0, 1469 0, 1470 /*39*/ MatAXPY_MPISBAIJ, 1471 MatGetSubMatrices_MPISBAIJ, 1472 MatIncreaseOverlap_MPISBAIJ, 1473 MatGetValues_MPISBAIJ, 1474 MatCopy_MPISBAIJ, 1475 /*44*/ 0, 1476 MatScale_MPISBAIJ, 1477 0, 1478 0, 1479 0, 1480 /*49*/ MatSetBlockSize_MPISBAIJ, 1481 0, 1482 0, 1483 0, 1484 0, 1485 /*54*/ 0, 1486 0, 1487 MatSetUnfactored_MPISBAIJ, 1488 0, 1489 MatSetValuesBlocked_MPISBAIJ, 1490 /*59*/ 0, 1491 0, 1492 0, 1493 0, 1494 0, 1495 /*64*/ 0, 1496 0, 1497 0, 1498 0, 1499 0, 1500 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1501 0, 1502 0, 1503 0, 1504 0, 1505 /*74*/ 0, 1506 0, 1507 0, 1508 0, 1509 0, 1510 /*79*/ 0, 1511 0, 1512 0, 1513 0, 1514 MatLoad_MPISBAIJ, 1515 /*84*/ 0, 1516 0, 1517 0, 1518 0, 1519 0, 1520 /*89*/ 0, 1521 0, 1522 0, 1523 0, 1524 0, 1525 /*94*/ 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*99*/ 0, 1531 0, 1532 0, 1533 0, 1534 0, 1535 /*104*/0, 1536 MatRealPart_MPISBAIJ, 1537 MatImaginaryPart_MPISBAIJ, 1538 MatGetRowUpperTriangular_MPISBAIJ, 1539 MatRestoreRowUpperTriangular_MPISBAIJ, 1540 /*109*/0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 /*114*/0, 1546 0, 1547 0, 1548 0, 1549 0, 1550 /*119*/0, 1551 0, 1552 0, 1553 0 1554 }; 1555 1556 1557 EXTERN_C_BEGIN 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1560 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1561 { 1562 PetscFunctionBegin; 1563 *a = ((Mat_MPISBAIJ *)A->data)->A; 1564 *iscopy = PETSC_FALSE; 1565 PetscFunctionReturn(0); 1566 } 1567 EXTERN_C_END 1568 1569 EXTERN_C_BEGIN 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1572 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1573 { 1574 Mat_MPISBAIJ *b; 1575 PetscErrorCode ierr; 1576 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1577 1578 PetscFunctionBegin; 1579 if (bs < 0){ 1580 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1581 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1582 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1583 bs = PetscAbs(bs); 1584 } 1585 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1586 bs = newbs; 1587 1588 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1589 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1590 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1591 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1592 1593 B->rmap->bs = B->cmap->bs = bs; 1594 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1595 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1596 1597 if (d_nnz) { 1598 for (i=0; i<B->rmap->n/bs; i++) { 1599 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1600 } 1601 } 1602 if (o_nnz) { 1603 for (i=0; i<B->rmap->n/bs; i++) { 1604 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1605 } 1606 } 1607 1608 b = (Mat_MPISBAIJ*)B->data; 1609 mbs = B->rmap->n/bs; 1610 Mbs = B->rmap->N/bs; 1611 if (mbs*bs != B->rmap->n) { 1612 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1613 } 1614 1615 B->rmap->bs = bs; 1616 b->bs2 = bs*bs; 1617 b->mbs = mbs; 1618 b->nbs = mbs; 1619 b->Mbs = Mbs; 1620 b->Nbs = Mbs; 1621 1622 for (i=0; i<=b->size; i++) { 1623 b->rangebs[i] = B->rmap->range[i]/bs; 1624 } 1625 b->rstartbs = B->rmap->rstart/bs; 1626 b->rendbs = B->rmap->rend/bs; 1627 1628 b->cstartbs = B->cmap->rstart/bs; 1629 b->cendbs = B->cmap->rend/bs; 1630 1631 if (!B->preallocated) { 1632 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1633 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1634 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1635 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1636 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1637 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1638 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1639 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1640 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1641 } 1642 1643 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1644 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1645 B->preallocated = PETSC_TRUE; 1646 PetscFunctionReturn(0); 1647 } 1648 EXTERN_C_END 1649 1650 EXTERN_C_BEGIN 1651 #if defined(PETSC_HAVE_MUMPS) 1652 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1653 #endif 1654 #if defined(PETSC_HAVE_SPOOLES) 1655 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1656 #endif 1657 #if defined(PETSC_HAVE_PASTIX) 1658 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1659 #endif 1660 EXTERN_C_END 1661 1662 /*MC 1663 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1664 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1665 1666 Options Database Keys: 1667 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1668 1669 Level: beginner 1670 1671 .seealso: MatCreateMPISBAIJ 1672 M*/ 1673 1674 EXTERN_C_BEGIN 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatCreate_MPISBAIJ" 1677 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1678 { 1679 Mat_MPISBAIJ *b; 1680 PetscErrorCode ierr; 1681 PetscTruth flg; 1682 1683 PetscFunctionBegin; 1684 1685 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1686 B->data = (void*)b; 1687 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1688 1689 B->ops->destroy = MatDestroy_MPISBAIJ; 1690 B->ops->view = MatView_MPISBAIJ; 1691 B->mapping = 0; 1692 B->assembled = PETSC_FALSE; 1693 1694 B->insertmode = NOT_SET_VALUES; 1695 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1696 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1697 1698 /* build local table of row and column ownerships */ 1699 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1700 1701 /* build cache for off array entries formed */ 1702 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1703 b->donotstash = PETSC_FALSE; 1704 b->colmap = PETSC_NULL; 1705 b->garray = PETSC_NULL; 1706 b->roworiented = PETSC_TRUE; 1707 1708 /* stuff used in block assembly */ 1709 b->barray = 0; 1710 1711 /* stuff used for matrix vector multiply */ 1712 b->lvec = 0; 1713 b->Mvctx = 0; 1714 b->slvec0 = 0; 1715 b->slvec0b = 0; 1716 b->slvec1 = 0; 1717 b->slvec1a = 0; 1718 b->slvec1b = 0; 1719 b->sMvctx = 0; 1720 1721 /* stuff for MatGetRow() */ 1722 b->rowindices = 0; 1723 b->rowvalues = 0; 1724 b->getrowactive = PETSC_FALSE; 1725 1726 /* hash table stuff */ 1727 b->ht = 0; 1728 b->hd = 0; 1729 b->ht_size = 0; 1730 b->ht_flag = PETSC_FALSE; 1731 b->ht_fact = 0; 1732 b->ht_total_ct = 0; 1733 b->ht_insert_ct = 0; 1734 1735 b->in_loc = 0; 1736 b->v_loc = 0; 1737 b->n_loc = 0; 1738 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1739 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1740 if (flg) { 1741 PetscReal fact = 1.39; 1742 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1743 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1744 if (fact <= 1.0) fact = 1.39; 1745 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1746 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1747 } 1748 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1749 1750 #if defined(PETSC_HAVE_PASTIX) 1751 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1752 "MatGetFactor_mpisbaij_pastix", 1753 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1754 #endif 1755 #if defined(PETSC_HAVE_MUMPS) 1756 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1757 "MatGetFactor_sbaij_mumps", 1758 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1759 #endif 1760 #if defined(PETSC_HAVE_SPOOLES) 1761 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1762 "MatGetFactor_mpisbaij_spooles", 1763 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1764 #endif 1765 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1766 "MatStoreValues_MPISBAIJ", 1767 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1768 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1769 "MatRetrieveValues_MPISBAIJ", 1770 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1771 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1772 "MatGetDiagonalBlock_MPISBAIJ", 1773 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1774 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1775 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1776 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1777 B->symmetric = PETSC_TRUE; 1778 B->structurally_symmetric = PETSC_TRUE; 1779 B->symmetric_set = PETSC_TRUE; 1780 B->structurally_symmetric_set = PETSC_TRUE; 1781 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1782 PetscFunctionReturn(0); 1783 } 1784 EXTERN_C_END 1785 1786 /*MC 1787 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1788 1789 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1790 and MATMPISBAIJ otherwise. 1791 1792 Options Database Keys: 1793 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1794 1795 Level: beginner 1796 1797 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1798 M*/ 1799 1800 EXTERN_C_BEGIN 1801 #undef __FUNCT__ 1802 #define __FUNCT__ "MatCreate_SBAIJ" 1803 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1804 { 1805 PetscErrorCode ierr; 1806 PetscMPIInt size; 1807 1808 PetscFunctionBegin; 1809 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1810 if (size == 1) { 1811 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1812 } else { 1813 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1814 } 1815 PetscFunctionReturn(0); 1816 } 1817 EXTERN_C_END 1818 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1821 /*@C 1822 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1823 the user should preallocate the matrix storage by setting the parameters 1824 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1825 performance can be increased by more than a factor of 50. 1826 1827 Collective on Mat 1828 1829 Input Parameters: 1830 + A - the matrix 1831 . bs - size of blockk 1832 . d_nz - number of block nonzeros per block row in diagonal portion of local 1833 submatrix (same for all local rows) 1834 . d_nnz - array containing the number of block nonzeros in the various block rows 1835 in the upper triangular and diagonal part of the in diagonal portion of the local 1836 (possibly different for each block row) or PETSC_NULL. You must leave room 1837 for the diagonal entry even if it is zero. 1838 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1839 submatrix (same for all local rows). 1840 - o_nnz - array containing the number of nonzeros in the various block rows of the 1841 off-diagonal portion of the local submatrix (possibly different for 1842 each block row) or PETSC_NULL. 1843 1844 1845 Options Database Keys: 1846 . -mat_no_unroll - uses code that does not unroll the loops in the 1847 block calculations (much slower) 1848 . -mat_block_size - size of the blocks to use 1849 1850 Notes: 1851 1852 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1853 than it must be used on all processors that share the object for that argument. 1854 1855 If the *_nnz parameter is given then the *_nz parameter is ignored 1856 1857 Storage Information: 1858 For a square global matrix we define each processor's diagonal portion 1859 to be its local rows and the corresponding columns (a square submatrix); 1860 each processor's off-diagonal portion encompasses the remainder of the 1861 local matrix (a rectangular submatrix). 1862 1863 The user can specify preallocated storage for the diagonal part of 1864 the local submatrix with either d_nz or d_nnz (not both). Set 1865 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1866 memory allocation. Likewise, specify preallocated storage for the 1867 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1868 1869 You can call MatGetInfo() to get information on how effective the preallocation was; 1870 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1871 You can also run with the option -info and look for messages with the string 1872 malloc in them to see if additional memory allocation was needed. 1873 1874 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1875 the figure below we depict these three local rows and all columns (0-11). 1876 1877 .vb 1878 0 1 2 3 4 5 6 7 8 9 10 11 1879 ------------------- 1880 row 3 | o o o d d d o o o o o o 1881 row 4 | o o o d d d o o o o o o 1882 row 5 | o o o d d d o o o o o o 1883 ------------------- 1884 .ve 1885 1886 Thus, any entries in the d locations are stored in the d (diagonal) 1887 submatrix, and any entries in the o locations are stored in the 1888 o (off-diagonal) submatrix. Note that the d matrix is stored in 1889 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1890 1891 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1892 plus the diagonal part of the d matrix, 1893 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1894 In general, for PDE problems in which most nonzeros are near the diagonal, 1895 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1896 or you will get TERRIBLE performance; see the users' manual chapter on 1897 matrices. 1898 1899 Level: intermediate 1900 1901 .keywords: matrix, block, aij, compressed row, sparse, parallel 1902 1903 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1904 @*/ 1905 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1906 { 1907 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1908 1909 PetscFunctionBegin; 1910 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1911 if (f) { 1912 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatCreateMPISBAIJ" 1919 /*@C 1920 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1921 (block compressed row). For good matrix assembly performance 1922 the user should preallocate the matrix storage by setting the parameters 1923 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1924 performance can be increased by more than a factor of 50. 1925 1926 Collective on MPI_Comm 1927 1928 Input Parameters: 1929 + comm - MPI communicator 1930 . bs - size of blockk 1931 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1932 This value should be the same as the local size used in creating the 1933 y vector for the matrix-vector product y = Ax. 1934 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1935 This value should be the same as the local size used in creating the 1936 x vector for the matrix-vector product y = Ax. 1937 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1938 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1939 . d_nz - number of block nonzeros per block row in diagonal portion of local 1940 submatrix (same for all local rows) 1941 . d_nnz - array containing the number of block nonzeros in the various block rows 1942 in the upper triangular portion of the in diagonal portion of the local 1943 (possibly different for each block block row) or PETSC_NULL. 1944 You must leave room for the diagonal entry even if it is zero. 1945 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1946 submatrix (same for all local rows). 1947 - o_nnz - array containing the number of nonzeros in the various block rows of the 1948 off-diagonal portion of the local submatrix (possibly different for 1949 each block row) or PETSC_NULL. 1950 1951 Output Parameter: 1952 . A - the matrix 1953 1954 Options Database Keys: 1955 . -mat_no_unroll - uses code that does not unroll the loops in the 1956 block calculations (much slower) 1957 . -mat_block_size - size of the blocks to use 1958 . -mat_mpi - use the parallel matrix data structures even on one processor 1959 (defaults to using SeqBAIJ format on one processor) 1960 1961 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1962 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1963 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1964 1965 Notes: 1966 The number of rows and columns must be divisible by blocksize. 1967 This matrix type does not support complex Hermitian operation. 1968 1969 The user MUST specify either the local or global matrix dimensions 1970 (possibly both). 1971 1972 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1973 than it must be used on all processors that share the object for that argument. 1974 1975 If the *_nnz parameter is given then the *_nz parameter is ignored 1976 1977 Storage Information: 1978 For a square global matrix we define each processor's diagonal portion 1979 to be its local rows and the corresponding columns (a square submatrix); 1980 each processor's off-diagonal portion encompasses the remainder of the 1981 local matrix (a rectangular submatrix). 1982 1983 The user can specify preallocated storage for the diagonal part of 1984 the local submatrix with either d_nz or d_nnz (not both). Set 1985 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1986 memory allocation. Likewise, specify preallocated storage for the 1987 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1988 1989 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1990 the figure below we depict these three local rows and all columns (0-11). 1991 1992 .vb 1993 0 1 2 3 4 5 6 7 8 9 10 11 1994 ------------------- 1995 row 3 | o o o d d d o o o o o o 1996 row 4 | o o o d d d o o o o o o 1997 row 5 | o o o d d d o o o o o o 1998 ------------------- 1999 .ve 2000 2001 Thus, any entries in the d locations are stored in the d (diagonal) 2002 submatrix, and any entries in the o locations are stored in the 2003 o (off-diagonal) submatrix. Note that the d matrix is stored in 2004 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2005 2006 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2007 plus the diagonal part of the d matrix, 2008 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2009 In general, for PDE problems in which most nonzeros are near the diagonal, 2010 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2011 or you will get TERRIBLE performance; see the users' manual chapter on 2012 matrices. 2013 2014 Level: intermediate 2015 2016 .keywords: matrix, block, aij, compressed row, sparse, parallel 2017 2018 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2019 @*/ 2020 2021 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2022 { 2023 PetscErrorCode ierr; 2024 PetscMPIInt size; 2025 2026 PetscFunctionBegin; 2027 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2028 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2029 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2030 if (size > 1) { 2031 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2032 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2033 } else { 2034 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2035 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2036 } 2037 PetscFunctionReturn(0); 2038 } 2039 2040 2041 #undef __FUNCT__ 2042 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2043 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2044 { 2045 Mat mat; 2046 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2047 PetscErrorCode ierr; 2048 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2049 PetscScalar *array; 2050 2051 PetscFunctionBegin; 2052 *newmat = 0; 2053 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2054 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2055 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2056 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2057 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2058 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2059 2060 mat->factortype = matin->factortype; 2061 mat->preallocated = PETSC_TRUE; 2062 mat->assembled = PETSC_TRUE; 2063 mat->insertmode = NOT_SET_VALUES; 2064 2065 a = (Mat_MPISBAIJ*)mat->data; 2066 a->bs2 = oldmat->bs2; 2067 a->mbs = oldmat->mbs; 2068 a->nbs = oldmat->nbs; 2069 a->Mbs = oldmat->Mbs; 2070 a->Nbs = oldmat->Nbs; 2071 2072 2073 a->size = oldmat->size; 2074 a->rank = oldmat->rank; 2075 a->donotstash = oldmat->donotstash; 2076 a->roworiented = oldmat->roworiented; 2077 a->rowindices = 0; 2078 a->rowvalues = 0; 2079 a->getrowactive = PETSC_FALSE; 2080 a->barray = 0; 2081 a->rstartbs = oldmat->rstartbs; 2082 a->rendbs = oldmat->rendbs; 2083 a->cstartbs = oldmat->cstartbs; 2084 a->cendbs = oldmat->cendbs; 2085 2086 /* hash table stuff */ 2087 a->ht = 0; 2088 a->hd = 0; 2089 a->ht_size = 0; 2090 a->ht_flag = oldmat->ht_flag; 2091 a->ht_fact = oldmat->ht_fact; 2092 a->ht_total_ct = 0; 2093 a->ht_insert_ct = 0; 2094 2095 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2096 if (oldmat->colmap) { 2097 #if defined (PETSC_USE_CTABLE) 2098 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2099 #else 2100 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2101 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2102 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2103 #endif 2104 } else a->colmap = 0; 2105 2106 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2107 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2108 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2109 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2110 } else a->garray = 0; 2111 2112 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2113 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2114 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2115 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2116 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2117 2118 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2119 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2120 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2121 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2122 2123 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2124 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2125 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2126 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2127 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2128 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2129 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2130 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2131 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2132 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2133 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2134 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2135 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2136 2137 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2138 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2139 a->sMvctx = oldmat->sMvctx; 2140 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2141 2142 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2143 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2144 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2145 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2146 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2147 *newmat = mat; 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatLoad_MPISBAIJ" 2153 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, Mat newmat) 2154 { 2155 PetscErrorCode ierr; 2156 PetscInt i,nz,j,rstart,rend; 2157 PetscScalar *vals,*buf; 2158 MPI_Comm comm = ((PetscObject)viewer)->comm; 2159 MPI_Status status; 2160 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2161 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2162 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2163 PetscInt bs=1,Mbs,mbs,extra_rows; 2164 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2165 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2166 int fd; 2167 2168 PetscFunctionBegin; 2169 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2170 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2171 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2172 2173 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2174 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2175 if (!rank) { 2176 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2177 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2178 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2179 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2180 } 2181 2182 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2183 2184 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2185 M = header[1]; N = header[2]; 2186 2187 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2188 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2189 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2190 2191 /* If global sizes are set, check if they are consistent with that given in the file */ 2192 if (sizesset) { 2193 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2194 } 2195 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2196 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2197 2198 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2199 2200 /* 2201 This code adds extra rows to make sure the number of rows is 2202 divisible by the blocksize 2203 */ 2204 Mbs = M/bs; 2205 extra_rows = bs - M + bs*(Mbs); 2206 if (extra_rows == bs) extra_rows = 0; 2207 else Mbs++; 2208 if (extra_rows &&!rank) { 2209 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2210 } 2211 2212 /* determine ownership of all rows */ 2213 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2214 mbs = Mbs/size + ((Mbs % size) > rank); 2215 m = mbs*bs; 2216 } else { /* User Set */ 2217 m = newmat->rmap->n; 2218 mbs = m/bs; 2219 } 2220 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2221 mmbs = PetscMPIIntCast(mbs); 2222 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2223 rowners[0] = 0; 2224 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2225 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2226 rstart = rowners[rank]; 2227 rend = rowners[rank+1]; 2228 2229 /* distribute row lengths to all processors */ 2230 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2231 if (!rank) { 2232 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2233 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2234 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2235 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2236 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2237 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2238 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2239 } else { 2240 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2241 } 2242 2243 if (!rank) { /* procs[0] */ 2244 /* calculate the number of nonzeros on each processor */ 2245 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2246 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2247 for (i=0; i<size; i++) { 2248 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2249 procsnz[i] += rowlengths[j]; 2250 } 2251 } 2252 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2253 2254 /* determine max buffer needed and allocate it */ 2255 maxnz = 0; 2256 for (i=0; i<size; i++) { 2257 maxnz = PetscMax(maxnz,procsnz[i]); 2258 } 2259 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2260 2261 /* read in my part of the matrix column indices */ 2262 nz = procsnz[0]; 2263 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2264 mycols = ibuf; 2265 if (size == 1) nz -= extra_rows; 2266 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2267 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2268 2269 /* read in every ones (except the last) and ship off */ 2270 for (i=1; i<size-1; i++) { 2271 nz = procsnz[i]; 2272 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2273 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2274 } 2275 /* read in the stuff for the last proc */ 2276 if (size != 1) { 2277 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2278 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2279 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2280 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2281 } 2282 ierr = PetscFree(cols);CHKERRQ(ierr); 2283 } else { /* procs[i], i>0 */ 2284 /* determine buffer space needed for message */ 2285 nz = 0; 2286 for (i=0; i<m; i++) { 2287 nz += locrowlens[i]; 2288 } 2289 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2290 mycols = ibuf; 2291 /* receive message of column indices*/ 2292 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2293 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2294 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2295 } 2296 2297 /* loop over local rows, determining number of off diagonal entries */ 2298 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2299 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2300 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2301 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2302 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2303 rowcount = 0; 2304 nzcount = 0; 2305 for (i=0; i<mbs; i++) { 2306 dcount = 0; 2307 odcount = 0; 2308 for (j=0; j<bs; j++) { 2309 kmax = locrowlens[rowcount]; 2310 for (k=0; k<kmax; k++) { 2311 tmp = mycols[nzcount++]/bs; /* block col. index */ 2312 if (!mask[tmp]) { 2313 mask[tmp] = 1; 2314 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2315 else masked1[dcount++] = tmp; /* entry in diag portion */ 2316 } 2317 } 2318 rowcount++; 2319 } 2320 2321 dlens[i] = dcount; /* d_nzz[i] */ 2322 odlens[i] = odcount; /* o_nzz[i] */ 2323 2324 /* zero out the mask elements we set */ 2325 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2326 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2327 } 2328 if (!sizesset) { 2329 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2330 } 2331 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2332 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2333 2334 if (!rank) { 2335 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2336 /* read in my part of the matrix numerical values */ 2337 nz = procsnz[0]; 2338 vals = buf; 2339 mycols = ibuf; 2340 if (size == 1) nz -= extra_rows; 2341 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2342 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2343 2344 /* insert into matrix */ 2345 jj = rstart*bs; 2346 for (i=0; i<m; i++) { 2347 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2348 mycols += locrowlens[i]; 2349 vals += locrowlens[i]; 2350 jj++; 2351 } 2352 2353 /* read in other processors (except the last one) and ship out */ 2354 for (i=1; i<size-1; i++) { 2355 nz = procsnz[i]; 2356 vals = buf; 2357 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2358 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2359 } 2360 /* the last proc */ 2361 if (size != 1){ 2362 nz = procsnz[i] - extra_rows; 2363 vals = buf; 2364 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2365 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2366 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2367 } 2368 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2369 2370 } else { 2371 /* receive numeric values */ 2372 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2373 2374 /* receive message of values*/ 2375 vals = buf; 2376 mycols = ibuf; 2377 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2378 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2379 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2380 2381 /* insert into matrix */ 2382 jj = rstart*bs; 2383 for (i=0; i<m; i++) { 2384 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2385 mycols += locrowlens[i]; 2386 vals += locrowlens[i]; 2387 jj++; 2388 } 2389 } 2390 2391 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2392 ierr = PetscFree(buf);CHKERRQ(ierr); 2393 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2394 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2395 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2396 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2397 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2398 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2399 PetscFunctionReturn(0); 2400 } 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2404 /*XXXXX@ 2405 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2406 2407 Input Parameters: 2408 . mat - the matrix 2409 . fact - factor 2410 2411 Collective on Mat 2412 2413 Level: advanced 2414 2415 Notes: 2416 This can also be set by the command line option: -mat_use_hash_table fact 2417 2418 .keywords: matrix, hashtable, factor, HT 2419 2420 .seealso: MatSetOption() 2421 @XXXXX*/ 2422 2423 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2426 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2427 { 2428 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2429 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2430 PetscReal atmp; 2431 PetscReal *work,*svalues,*rvalues; 2432 PetscErrorCode ierr; 2433 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2434 PetscMPIInt rank,size; 2435 PetscInt *rowners_bs,dest,count,source; 2436 PetscScalar *va; 2437 MatScalar *ba; 2438 MPI_Status stat; 2439 2440 PetscFunctionBegin; 2441 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2442 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2443 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2444 2445 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2446 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2447 2448 bs = A->rmap->bs; 2449 mbs = a->mbs; 2450 Mbs = a->Mbs; 2451 ba = b->a; 2452 bi = b->i; 2453 bj = b->j; 2454 2455 /* find ownerships */ 2456 rowners_bs = A->rmap->range; 2457 2458 /* each proc creates an array to be distributed */ 2459 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2460 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2461 2462 /* row_max for B */ 2463 if (rank != size-1){ 2464 for (i=0; i<mbs; i++) { 2465 ncols = bi[1] - bi[0]; bi++; 2466 brow = bs*i; 2467 for (j=0; j<ncols; j++){ 2468 bcol = bs*(*bj); 2469 for (kcol=0; kcol<bs; kcol++){ 2470 col = bcol + kcol; /* local col index */ 2471 col += rowners_bs[rank+1]; /* global col index */ 2472 for (krow=0; krow<bs; krow++){ 2473 atmp = PetscAbsScalar(*ba); ba++; 2474 row = brow + krow; /* local row index */ 2475 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2476 if (work[col] < atmp) work[col] = atmp; 2477 } 2478 } 2479 bj++; 2480 } 2481 } 2482 2483 /* send values to its owners */ 2484 for (dest=rank+1; dest<size; dest++){ 2485 svalues = work + rowners_bs[dest]; 2486 count = rowners_bs[dest+1]-rowners_bs[dest]; 2487 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2488 } 2489 } 2490 2491 /* receive values */ 2492 if (rank){ 2493 rvalues = work; 2494 count = rowners_bs[rank+1]-rowners_bs[rank]; 2495 for (source=0; source<rank; source++){ 2496 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2497 /* process values */ 2498 for (i=0; i<count; i++){ 2499 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2500 } 2501 } 2502 } 2503 2504 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2505 ierr = PetscFree(work);CHKERRQ(ierr); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 #undef __FUNCT__ 2510 #define __FUNCT__ "MatSOR_MPISBAIJ" 2511 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2512 { 2513 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2514 PetscErrorCode ierr; 2515 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2516 PetscScalar *x,*ptr,*from; 2517 Vec bb1; 2518 const PetscScalar *b; 2519 2520 PetscFunctionBegin; 2521 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2522 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2523 2524 if (flag == SOR_APPLY_UPPER) { 2525 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2526 PetscFunctionReturn(0); 2527 } 2528 2529 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2530 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2531 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2532 its--; 2533 } 2534 2535 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2536 while (its--){ 2537 2538 /* lower triangular part: slvec0b = - B^T*xx */ 2539 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2540 2541 /* copy xx into slvec0a */ 2542 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2543 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2544 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2545 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2546 2547 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2548 2549 /* copy bb into slvec1a */ 2550 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2551 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2552 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2553 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2554 2555 /* set slvec1b = 0 */ 2556 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2557 2558 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2559 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2560 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2561 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2562 2563 /* upper triangular part: bb1 = bb1 - B*x */ 2564 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2565 2566 /* local diagonal sweep */ 2567 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2568 } 2569 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2570 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2571 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2572 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2573 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2574 } else if (flag & SOR_EISENSTAT) { 2575 Vec xx1; 2576 PetscTruth hasop; 2577 const PetscScalar *diag; 2578 PetscScalar *sl,scale = (omega - 2.0)/omega; 2579 PetscInt i,n; 2580 2581 if (!mat->xx1) { 2582 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2583 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2584 } 2585 xx1 = mat->xx1; 2586 bb1 = mat->bb1; 2587 2588 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2589 2590 if (!mat->diag) { 2591 /* this is wrong for same matrix with new nonzero values */ 2592 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2593 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2594 } 2595 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2596 2597 if (hasop) { 2598 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2599 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2600 } else { 2601 /* 2602 These two lines are replaced by code that may be a bit faster for a good compiler 2603 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2604 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2605 */ 2606 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2607 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2608 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2609 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2610 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2611 if (omega == 1.0) { 2612 for (i=0; i<n; i++) { 2613 sl[i] = b[i] - diag[i]*x[i]; 2614 } 2615 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2616 } else { 2617 for (i=0; i<n; i++) { 2618 sl[i] = b[i] + scale*diag[i]*x[i]; 2619 } 2620 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2621 } 2622 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2623 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2624 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2625 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2626 } 2627 2628 /* multiply off-diagonal portion of matrix */ 2629 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2630 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2631 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2632 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2633 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2634 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2635 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2636 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2637 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2638 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2639 2640 /* local sweep */ 2641 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2642 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2643 } else { 2644 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2645 } 2646 PetscFunctionReturn(0); 2647 } 2648 2649 #undef __FUNCT__ 2650 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2651 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2652 { 2653 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2654 PetscErrorCode ierr; 2655 Vec lvec1,bb1; 2656 2657 PetscFunctionBegin; 2658 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2659 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2660 2661 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2662 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2663 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2664 its--; 2665 } 2666 2667 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2668 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2669 while (its--){ 2670 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2671 2672 /* lower diagonal part: bb1 = bb - B^T*xx */ 2673 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2674 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2675 2676 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2677 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2678 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2679 2680 /* upper diagonal part: bb1 = bb1 - B*x */ 2681 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2682 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2683 2684 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2685 2686 /* diagonal sweep */ 2687 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2688 } 2689 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2690 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2691 } else { 2692 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2693 } 2694 PetscFunctionReturn(0); 2695 } 2696 2697