1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h" 5 #include "../src/mat/impls/sbaij/seq/sbaij.h" 6 #include "petscblaslapack.h" 7 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 EXTERN_C_BEGIN 25 #undef __FUNCT__ 26 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 28 { 29 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 30 PetscErrorCode ierr; 31 32 PetscFunctionBegin; 33 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 34 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 EXTERN_C_BEGIN 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 43 { 44 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 45 PetscErrorCode ierr; 46 47 PetscFunctionBegin; 48 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 49 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 EXTERN_C_END 53 54 55 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 56 { \ 57 \ 58 brow = row/bs; \ 59 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 60 rmax = aimax[brow]; nrow = ailen[brow]; \ 61 bcol = col/bs; \ 62 ridx = row % bs; cidx = col % bs; \ 63 low = 0; high = nrow; \ 64 while (high-low > 3) { \ 65 t = (low+high)/2; \ 66 if (rp[t] > bcol) high = t; \ 67 else low = t; \ 68 } \ 69 for (_i=low; _i<high; _i++) { \ 70 if (rp[_i] > bcol) break; \ 71 if (rp[_i] == bcol) { \ 72 bap = ap + bs2*_i + bs*cidx + ridx; \ 73 if (addv == ADD_VALUES) *bap += value; \ 74 else *bap = value; \ 75 goto a_noinsert; \ 76 } \ 77 } \ 78 if (a->nonew == 1) goto a_noinsert; \ 79 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 80 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 81 N = nrow++ - 1; \ 82 /* shift up all the later entries in this row */ \ 83 for (ii=N; ii>=_i; ii--) { \ 84 rp[ii+1] = rp[ii]; \ 85 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 86 } \ 87 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 88 rp[_i] = bcol; \ 89 ap[bs2*_i + bs*cidx + ridx] = value; \ 90 a_noinsert:; \ 91 ailen[brow] = nrow; \ 92 } 93 94 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 95 { \ 96 brow = row/bs; \ 97 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 98 rmax = bimax[brow]; nrow = bilen[brow]; \ 99 bcol = col/bs; \ 100 ridx = row % bs; cidx = col % bs; \ 101 low = 0; high = nrow; \ 102 while (high-low > 3) { \ 103 t = (low+high)/2; \ 104 if (rp[t] > bcol) high = t; \ 105 else low = t; \ 106 } \ 107 for (_i=low; _i<high; _i++) { \ 108 if (rp[_i] > bcol) break; \ 109 if (rp[_i] == bcol) { \ 110 bap = ap + bs2*_i + bs*cidx + ridx; \ 111 if (addv == ADD_VALUES) *bap += value; \ 112 else *bap = value; \ 113 goto b_noinsert; \ 114 } \ 115 } \ 116 if (b->nonew == 1) goto b_noinsert; \ 117 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 118 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 119 N = nrow++ - 1; \ 120 /* shift up all the later entries in this row */ \ 121 for (ii=N; ii>=_i; ii--) { \ 122 rp[ii+1] = rp[ii]; \ 123 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 124 } \ 125 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 126 rp[_i] = bcol; \ 127 ap[bs2*_i + bs*cidx + ridx] = value; \ 128 b_noinsert:; \ 129 bilen[brow] = nrow; \ 130 } 131 132 /* Only add/insert a(i,j) with i<=j (blocks). 133 Any a(i,j) with i>j input by user is ingored. 134 */ 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatSetValues_MPISBAIJ" 137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 138 { 139 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 140 MatScalar value; 141 PetscTruth roworiented = baij->roworiented; 142 PetscErrorCode ierr; 143 PetscInt i,j,row,col; 144 PetscInt rstart_orig=mat->rmap->rstart; 145 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 146 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 147 148 /* Some Variables required in the macro */ 149 Mat A = baij->A; 150 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 151 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 152 MatScalar *aa=a->a; 153 154 Mat B = baij->B; 155 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 156 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 157 MatScalar *ba=b->a; 158 159 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 160 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 161 MatScalar *ap,*bap; 162 163 /* for stash */ 164 PetscInt n_loc, *in_loc = PETSC_NULL; 165 MatScalar *v_loc = PETSC_NULL; 166 167 PetscFunctionBegin; 168 if (v) PetscValidScalarPointer(v,6); 169 if (!baij->donotstash){ 170 if (n > baij->n_loc) { 171 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 172 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 174 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 175 baij->n_loc = n; 176 } 177 in_loc = baij->in_loc; 178 v_loc = baij->v_loc; 179 } 180 181 for (i=0; i<m; i++) { 182 if (im[i] < 0) continue; 183 #if defined(PETSC_USE_DEBUG) 184 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 185 #endif 186 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 187 row = im[i] - rstart_orig; /* local row index */ 188 for (j=0; j<n; j++) { 189 if (im[i]/bs > in[j]/bs){ 190 if (a->ignore_ltriangular){ 191 continue; /* ignore lower triangular blocks */ 192 } else { 193 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 194 } 195 } 196 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 197 col = in[j] - cstart_orig; /* local col index */ 198 brow = row/bs; bcol = col/bs; 199 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 200 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 201 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 202 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 203 } else if (in[j] < 0) continue; 204 #if defined(PETSC_USE_DEBUG) 205 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 206 #endif 207 else { /* off-diag entry (B) */ 208 if (mat->was_assembled) { 209 if (!baij->colmap) { 210 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 211 } 212 #if defined (PETSC_USE_CTABLE) 213 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 214 col = col - 1; 215 #else 216 col = baij->colmap[in[j]/bs] - 1; 217 #endif 218 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 219 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 220 col = in[j]; 221 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 222 B = baij->B; 223 b = (Mat_SeqBAIJ*)(B)->data; 224 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 225 ba=b->a; 226 } else col += in[j]%bs; 227 } else col = in[j]; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } 232 } 233 } else { /* off processor entry */ 234 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 235 if (!baij->donotstash) { 236 n_loc = 0; 237 for (j=0; j<n; j++){ 238 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 239 in_loc[n_loc] = in[j]; 240 if (roworiented) { 241 v_loc[n_loc] = v[i*n+j]; 242 } else { 243 v_loc[n_loc] = v[j*m+i]; 244 } 245 n_loc++; 246 } 247 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 257 { 258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 259 const MatScalar *value; 260 MatScalar *barray=baij->barray; 261 PetscTruth roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 262 PetscErrorCode ierr; 263 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 264 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 265 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 266 267 PetscFunctionBegin; 268 if(!barray) { 269 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 270 baij->barray = barray; 271 } 272 273 if (roworiented) { 274 stepval = (n-1)*bs; 275 } else { 276 stepval = (m-1)*bs; 277 } 278 for (i=0; i<m; i++) { 279 if (im[i] < 0) continue; 280 #if defined(PETSC_USE_DEBUG) 281 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 282 #endif 283 if (im[i] >= rstart && im[i] < rend) { 284 row = im[i] - rstart; 285 for (j=0; j<n; j++) { 286 if (im[i] > in[j]) { 287 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 288 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 289 } 290 /* If NumCol = 1 then a copy is not required */ 291 if ((roworiented) && (n == 1)) { 292 barray = (MatScalar*) v + i*bs2; 293 } else if((!roworiented) && (m == 1)) { 294 barray = (MatScalar*) v + j*bs2; 295 } else { /* Here a copy is required */ 296 if (roworiented) { 297 value = v + i*(stepval+bs)*bs + j*bs; 298 } else { 299 value = v + j*(stepval+bs)*bs + i*bs; 300 } 301 for (ii=0; ii<bs; ii++,value+=stepval) { 302 for (jj=0; jj<bs; jj++) { 303 *barray++ = *value++; 304 } 305 } 306 barray -=bs2; 307 } 308 309 if (in[j] >= cstart && in[j] < cend){ 310 col = in[j] - cstart; 311 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 312 } 313 else if (in[j] < 0) continue; 314 #if defined(PETSC_USE_DEBUG) 315 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 316 #endif 317 else { 318 if (mat->was_assembled) { 319 if (!baij->colmap) { 320 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 321 } 322 323 #if defined(PETSC_USE_DEBUG) 324 #if defined (PETSC_USE_CTABLE) 325 { PetscInt data; 326 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 327 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 328 } 329 #else 330 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 331 #endif 332 #endif 333 #if defined (PETSC_USE_CTABLE) 334 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 335 col = (col - 1)/bs; 336 #else 337 col = (baij->colmap[in[j]] - 1)/bs; 338 #endif 339 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 340 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 341 col = in[j]; 342 } 343 } 344 else col = in[j]; 345 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 346 } 347 } 348 } else { 349 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 350 if (!baij->donotstash) { 351 if (roworiented) { 352 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 353 } else { 354 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 355 } 356 } 357 } 358 } 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatGetValues_MPISBAIJ" 364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 365 { 366 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 367 PetscErrorCode ierr; 368 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 369 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 370 371 PetscFunctionBegin; 372 for (i=0; i<m; i++) { 373 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 374 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 375 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 376 row = idxm[i] - bsrstart; 377 for (j=0; j<n; j++) { 378 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 379 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 380 if (idxn[j] >= bscstart && idxn[j] < bscend){ 381 col = idxn[j] - bscstart; 382 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 383 } else { 384 if (!baij->colmap) { 385 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 386 } 387 #if defined (PETSC_USE_CTABLE) 388 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 389 data --; 390 #else 391 data = baij->colmap[idxn[j]/bs]-1; 392 #endif 393 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 394 else { 395 col = data + idxn[j]%bs; 396 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 397 } 398 } 399 } 400 } else { 401 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 402 } 403 } 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatNorm_MPISBAIJ" 409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 410 { 411 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscReal sum[2],*lnorm2; 414 415 PetscFunctionBegin; 416 if (baij->size == 1) { 417 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 418 } else { 419 if (type == NORM_FROBENIUS) { 420 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 421 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 422 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 423 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 424 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 425 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 426 *norm = sqrt(sum[0] + 2*sum[1]); 427 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 428 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 429 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 430 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 431 PetscReal *rsum,*rsum2,vabs; 432 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 433 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 434 MatScalar *v; 435 436 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 437 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 438 /* Amat */ 439 v = amat->a; jj = amat->j; 440 for (brow=0; brow<mbs; brow++) { 441 grow = bs*(rstart + brow); 442 nz = amat->i[brow+1] - amat->i[brow]; 443 for (bcol=0; bcol<nz; bcol++){ 444 gcol = bs*(rstart + *jj); jj++; 445 for (col=0; col<bs; col++){ 446 for (row=0; row<bs; row++){ 447 vabs = PetscAbsScalar(*v); v++; 448 rsum[gcol+col] += vabs; 449 /* non-diagonal block */ 450 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 451 } 452 } 453 } 454 } 455 /* Bmat */ 456 v = bmat->a; jj = bmat->j; 457 for (brow=0; brow<mbs; brow++) { 458 grow = bs*(rstart + brow); 459 nz = bmat->i[brow+1] - bmat->i[brow]; 460 for (bcol=0; bcol<nz; bcol++){ 461 gcol = bs*garray[*jj]; jj++; 462 for (col=0; col<bs; col++){ 463 for (row=0; row<bs; row++){ 464 vabs = PetscAbsScalar(*v); v++; 465 rsum[gcol+col] += vabs; 466 rsum[grow+row] += vabs; 467 } 468 } 469 } 470 } 471 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 472 *norm = 0.0; 473 for (col=0; col<mat->cmap->N; col++) { 474 if (rsum2[col] > *norm) *norm = rsum2[col]; 475 } 476 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 477 } else { 478 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 479 } 480 } 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 487 { 488 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 489 PetscErrorCode ierr; 490 PetscInt nstash,reallocs; 491 InsertMode addv; 492 493 PetscFunctionBegin; 494 if (baij->donotstash || mat->nooffprocentries) { 495 PetscFunctionReturn(0); 496 } 497 498 /* make sure all processors are either in INSERTMODE or ADDMODE */ 499 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 500 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 501 mat->insertmode = addv; /* in case this processor had no cache */ 502 503 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 504 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 505 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 506 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 507 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 508 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 514 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 515 { 516 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 517 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 518 PetscErrorCode ierr; 519 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 520 PetscInt *row,*col; 521 PetscTruth other_disassembled; 522 PetscMPIInt n; 523 PetscTruth r1,r2,r3; 524 MatScalar *val; 525 InsertMode addv = mat->insertmode; 526 527 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 528 PetscFunctionBegin; 529 530 if (!baij->donotstash && !mat->nooffprocentries) { 531 while (1) { 532 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 533 if (!flg) break; 534 535 for (i=0; i<n;) { 536 /* Now identify the consecutive vals belonging to the same row */ 537 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 538 if (j < n) ncols = j-i; 539 else ncols = n-i; 540 /* Now assemble all these values with a single function call */ 541 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 542 i = j; 543 } 544 } 545 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 546 /* Now process the block-stash. Since the values are stashed column-oriented, 547 set the roworiented flag to column oriented, and after MatSetValues() 548 restore the original flags */ 549 r1 = baij->roworiented; 550 r2 = a->roworiented; 551 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 552 baij->roworiented = PETSC_FALSE; 553 a->roworiented = PETSC_FALSE; 554 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 555 while (1) { 556 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 557 if (!flg) break; 558 559 for (i=0; i<n;) { 560 /* Now identify the consecutive vals belonging to the same row */ 561 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 562 if (j < n) ncols = j-i; 563 else ncols = n-i; 564 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 565 i = j; 566 } 567 } 568 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 569 baij->roworiented = r1; 570 a->roworiented = r2; 571 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 572 } 573 574 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 575 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 576 577 /* determine if any processor has disassembled, if so we must 578 also disassemble ourselfs, in order that we may reassemble. */ 579 /* 580 if nonzero structure of submatrix B cannot change then we know that 581 no processor disassembled thus we can skip this stuff 582 */ 583 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 584 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 585 if (mat->was_assembled && !other_disassembled) { 586 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 587 } 588 } 589 590 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 591 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 592 } 593 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 594 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 595 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 596 597 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 598 baij->rowvalues = 0; 599 600 PetscFunctionReturn(0); 601 } 602 603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 606 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 607 { 608 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 609 PetscErrorCode ierr; 610 PetscInt bs = mat->rmap->bs; 611 PetscMPIInt size = baij->size,rank = baij->rank; 612 PetscTruth iascii,isdraw; 613 PetscViewer sviewer; 614 PetscViewerFormat format; 615 616 PetscFunctionBegin; 617 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 618 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 619 if (iascii) { 620 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 621 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 622 MatInfo info; 623 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 624 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 625 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 626 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 627 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 628 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 629 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 630 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 631 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 632 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 633 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 634 PetscFunctionReturn(0); 635 } else if (format == PETSC_VIEWER_ASCII_INFO) { 636 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 639 PetscFunctionReturn(0); 640 } 641 } 642 643 if (isdraw) { 644 PetscDraw draw; 645 PetscTruth isnull; 646 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 647 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 648 } 649 650 if (size == 1) { 651 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 652 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 653 } else { 654 /* assemble the entire matrix onto first processor. */ 655 Mat A; 656 Mat_SeqSBAIJ *Aloc; 657 Mat_SeqBAIJ *Bloc; 658 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 659 MatScalar *a; 660 661 /* Should this be the same type as mat? */ 662 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 663 if (!rank) { 664 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 665 } else { 666 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 667 } 668 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 669 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 670 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 671 672 /* copy over the A part */ 673 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 674 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 675 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 676 677 for (i=0; i<mbs; i++) { 678 rvals[0] = bs*(baij->rstartbs + i); 679 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 680 for (j=ai[i]; j<ai[i+1]; j++) { 681 col = (baij->cstartbs+aj[j])*bs; 682 for (k=0; k<bs; k++) { 683 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 684 col++; a += bs; 685 } 686 } 687 } 688 /* copy over the B part */ 689 Bloc = (Mat_SeqBAIJ*)baij->B->data; 690 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 691 for (i=0; i<mbs; i++) { 692 693 rvals[0] = bs*(baij->rstartbs + i); 694 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 695 for (j=ai[i]; j<ai[i+1]; j++) { 696 col = baij->garray[aj[j]]*bs; 697 for (k=0; k<bs; k++) { 698 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 699 col++; a += bs; 700 } 701 } 702 } 703 ierr = PetscFree(rvals);CHKERRQ(ierr); 704 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 705 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 706 /* 707 Everyone has to call to draw the matrix since the graphics waits are 708 synchronized across all processors that share the PetscDraw object 709 */ 710 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 711 if (!rank) { 712 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 713 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 714 } 715 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 716 ierr = MatDestroy(A);CHKERRQ(ierr); 717 } 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatView_MPISBAIJ" 723 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 724 { 725 PetscErrorCode ierr; 726 PetscTruth iascii,isdraw,issocket,isbinary; 727 728 PetscFunctionBegin; 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 730 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 732 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 733 if (iascii || isdraw || issocket || isbinary) { 734 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 735 } else { 736 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 737 } 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatDestroy_MPISBAIJ" 743 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 744 { 745 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 746 PetscErrorCode ierr; 747 748 PetscFunctionBegin; 749 #if defined(PETSC_USE_LOG) 750 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 751 #endif 752 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 753 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 754 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 755 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 756 #if defined (PETSC_USE_CTABLE) 757 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 758 #else 759 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 760 #endif 761 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 762 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 763 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 764 if (baij->slvec0) { 765 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 766 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 767 } 768 if (baij->slvec1) { 769 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 770 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 771 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 772 } 773 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 774 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 775 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 776 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 777 if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);} 778 if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);} 779 if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);} 780 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 781 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 782 #endif 783 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 784 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 785 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 786 ierr = PetscFree(baij);CHKERRQ(ierr); 787 788 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 798 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 799 { 800 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 801 PetscErrorCode ierr; 802 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 803 PetscScalar *x,*from; 804 805 PetscFunctionBegin; 806 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 807 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 808 809 /* diagonal part */ 810 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 811 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 812 813 /* subdiagonal part */ 814 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 815 816 /* copy x into the vec slvec0 */ 817 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 818 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 819 820 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 821 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 822 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 823 824 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 /* supperdiagonal part */ 827 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 831 #undef __FUNCT__ 832 #define __FUNCT__ "MatMult_MPISBAIJ" 833 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 834 { 835 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 836 PetscErrorCode ierr; 837 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 838 PetscScalar *x,*from; 839 840 PetscFunctionBegin; 841 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 842 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 843 844 /* diagonal part */ 845 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 846 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 847 848 /* subdiagonal part */ 849 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 850 851 /* copy x into the vec slvec0 */ 852 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 853 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 854 855 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 856 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 857 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 858 859 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 861 /* supperdiagonal part */ 862 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 868 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 869 { 870 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 871 PetscErrorCode ierr; 872 PetscInt nt; 873 874 PetscFunctionBegin; 875 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 876 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 877 878 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 879 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 880 881 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 /* do diagonal part */ 883 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 884 /* do supperdiagonal part */ 885 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 887 /* do subdiagonal part */ 888 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 889 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 890 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 891 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 897 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 898 { 899 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 900 PetscErrorCode ierr; 901 PetscInt mbs=a->mbs,bs=A->rmap->bs; 902 PetscScalar *x,*from,zero=0.0; 903 904 PetscFunctionBegin; 905 /* 906 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 907 PetscSynchronizedFlush(((PetscObject)A)->comm); 908 */ 909 /* diagonal part */ 910 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 911 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 912 913 /* subdiagonal part */ 914 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 915 916 /* copy x into the vec slvec0 */ 917 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 918 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 919 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 920 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 921 922 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 924 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 925 926 /* supperdiagonal part */ 927 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 928 929 PetscFunctionReturn(0); 930 } 931 932 #undef __FUNCT__ 933 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 934 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 935 { 936 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 937 PetscErrorCode ierr; 938 939 PetscFunctionBegin; 940 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 /* do diagonal part */ 942 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 943 /* do supperdiagonal part */ 944 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 946 947 /* do subdiagonal part */ 948 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 949 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 950 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 951 952 PetscFunctionReturn(0); 953 } 954 955 /* 956 This only works correctly for square matrices where the subblock A->A is the 957 diagonal block 958 */ 959 #undef __FUNCT__ 960 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 961 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 962 { 963 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 964 PetscErrorCode ierr; 965 966 PetscFunctionBegin; 967 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 968 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatScale_MPISBAIJ" 974 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 975 { 976 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 977 PetscErrorCode ierr; 978 979 PetscFunctionBegin; 980 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 981 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatGetRow_MPISBAIJ" 987 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 988 { 989 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 990 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 991 PetscErrorCode ierr; 992 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 993 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 994 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 995 996 PetscFunctionBegin; 997 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 998 mat->getrowactive = PETSC_TRUE; 999 1000 if (!mat->rowvalues && (idx || v)) { 1001 /* 1002 allocate enough space to hold information from the longest row. 1003 */ 1004 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1005 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1006 PetscInt max = 1,mbs = mat->mbs,tmp; 1007 for (i=0; i<mbs; i++) { 1008 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1009 if (max < tmp) { max = tmp; } 1010 } 1011 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1012 } 1013 1014 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1015 lrow = row - brstart; /* local row index */ 1016 1017 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1018 if (!v) {pvA = 0; pvB = 0;} 1019 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1020 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1021 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1022 nztot = nzA + nzB; 1023 1024 cmap = mat->garray; 1025 if (v || idx) { 1026 if (nztot) { 1027 /* Sort by increasing column numbers, assuming A and B already sorted */ 1028 PetscInt imark = -1; 1029 if (v) { 1030 *v = v_p = mat->rowvalues; 1031 for (i=0; i<nzB; i++) { 1032 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1033 else break; 1034 } 1035 imark = i; 1036 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1037 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1038 } 1039 if (idx) { 1040 *idx = idx_p = mat->rowindices; 1041 if (imark > -1) { 1042 for (i=0; i<imark; i++) { 1043 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1044 } 1045 } else { 1046 for (i=0; i<nzB; i++) { 1047 if (cmap[cworkB[i]/bs] < cstart) 1048 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1049 else break; 1050 } 1051 imark = i; 1052 } 1053 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1054 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1055 } 1056 } else { 1057 if (idx) *idx = 0; 1058 if (v) *v = 0; 1059 } 1060 } 1061 *nz = nztot; 1062 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1063 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1069 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1070 { 1071 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1072 1073 PetscFunctionBegin; 1074 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1075 baij->getrowactive = PETSC_FALSE; 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1081 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1082 { 1083 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1084 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1085 1086 PetscFunctionBegin; 1087 aA->getrow_utriangular = PETSC_TRUE; 1088 PetscFunctionReturn(0); 1089 } 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1092 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1093 { 1094 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1095 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1096 1097 PetscFunctionBegin; 1098 aA->getrow_utriangular = PETSC_FALSE; 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1104 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1105 { 1106 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1111 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1117 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1118 { 1119 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1120 PetscErrorCode ierr; 1121 1122 PetscFunctionBegin; 1123 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1124 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1130 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1131 { 1132 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 1135 PetscFunctionBegin; 1136 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1137 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1143 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1144 { 1145 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1146 Mat A = a->A,B = a->B; 1147 PetscErrorCode ierr; 1148 PetscReal isend[5],irecv[5]; 1149 1150 PetscFunctionBegin; 1151 info->block_size = (PetscReal)matin->rmap->bs; 1152 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1153 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1154 isend[3] = info->memory; isend[4] = info->mallocs; 1155 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1156 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1157 isend[3] += info->memory; isend[4] += info->mallocs; 1158 if (flag == MAT_LOCAL) { 1159 info->nz_used = isend[0]; 1160 info->nz_allocated = isend[1]; 1161 info->nz_unneeded = isend[2]; 1162 info->memory = isend[3]; 1163 info->mallocs = isend[4]; 1164 } else if (flag == MAT_GLOBAL_MAX) { 1165 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1166 info->nz_used = irecv[0]; 1167 info->nz_allocated = irecv[1]; 1168 info->nz_unneeded = irecv[2]; 1169 info->memory = irecv[3]; 1170 info->mallocs = irecv[4]; 1171 } else if (flag == MAT_GLOBAL_SUM) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else { 1179 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1180 } 1181 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1182 info->fill_ratio_needed = 0; 1183 info->factor_mallocs = 0; 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1189 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg) 1190 { 1191 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1192 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 switch (op) { 1197 case MAT_NEW_NONZERO_LOCATIONS: 1198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1199 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1200 case MAT_KEEP_NONZERO_PATTERN: 1201 case MAT_NEW_NONZERO_LOCATION_ERR: 1202 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1203 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1204 break; 1205 case MAT_ROW_ORIENTED: 1206 a->roworiented = flg; 1207 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1208 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1209 break; 1210 case MAT_NEW_DIAGONALS: 1211 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1212 break; 1213 case MAT_IGNORE_OFF_PROC_ENTRIES: 1214 a->donotstash = flg; 1215 break; 1216 case MAT_USE_HASH_TABLE: 1217 a->ht_flag = flg; 1218 break; 1219 case MAT_HERMITIAN: 1220 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1221 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1222 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1223 break; 1224 case MAT_SPD: 1225 A->spd_set = PETSC_TRUE; 1226 A->spd = flg; 1227 if (flg) { 1228 A->symmetric = PETSC_TRUE; 1229 A->structurally_symmetric = PETSC_TRUE; 1230 A->symmetric_set = PETSC_TRUE; 1231 A->structurally_symmetric_set = PETSC_TRUE; 1232 } 1233 break; 1234 case MAT_SYMMETRIC: 1235 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1236 break; 1237 case MAT_STRUCTURALLY_SYMMETRIC: 1238 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1239 break; 1240 case MAT_SYMMETRY_ETERNAL: 1241 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1242 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1243 break; 1244 case MAT_IGNORE_LOWER_TRIANGULAR: 1245 aA->ignore_ltriangular = flg; 1246 break; 1247 case MAT_ERROR_LOWER_TRIANGULAR: 1248 aA->ignore_ltriangular = flg; 1249 break; 1250 case MAT_GETROW_UPPERTRIANGULAR: 1251 aA->getrow_utriangular = flg; 1252 break; 1253 default: 1254 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1261 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1262 { 1263 PetscErrorCode ierr; 1264 PetscFunctionBegin; 1265 if (MAT_INITIAL_MATRIX || *B != A) { 1266 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1267 } 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1273 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1274 { 1275 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1276 Mat a=baij->A, b=baij->B; 1277 PetscErrorCode ierr; 1278 PetscInt nv,m,n; 1279 PetscTruth flg; 1280 1281 PetscFunctionBegin; 1282 if (ll != rr){ 1283 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1284 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1285 } 1286 if (!ll) PetscFunctionReturn(0); 1287 1288 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1289 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1290 1291 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1292 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1293 1294 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1295 1296 /* left diagonalscale the off-diagonal part */ 1297 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1298 1299 /* scale the diagonal part */ 1300 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1301 1302 /* right diagonalscale the off-diagonal part */ 1303 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1304 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1310 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1311 { 1312 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatEqual_MPISBAIJ" 1324 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1325 { 1326 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1327 Mat a,b,c,d; 1328 PetscTruth flg; 1329 PetscErrorCode ierr; 1330 1331 PetscFunctionBegin; 1332 a = matA->A; b = matA->B; 1333 c = matB->A; d = matB->B; 1334 1335 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1336 if (flg) { 1337 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1338 } 1339 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatCopy_MPISBAIJ" 1345 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1346 { 1347 PetscErrorCode ierr; 1348 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1349 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1350 1351 PetscFunctionBegin; 1352 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1353 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1354 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1355 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1356 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1357 } else { 1358 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1359 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1360 } 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1366 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1367 { 1368 PetscErrorCode ierr; 1369 1370 PetscFunctionBegin; 1371 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1377 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1378 { 1379 PetscErrorCode ierr; 1380 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1381 PetscBLASInt bnz,one=1; 1382 Mat_SeqSBAIJ *xa,*ya; 1383 Mat_SeqBAIJ *xb,*yb; 1384 1385 PetscFunctionBegin; 1386 if (str == SAME_NONZERO_PATTERN) { 1387 PetscScalar alpha = a; 1388 xa = (Mat_SeqSBAIJ *)xx->A->data; 1389 ya = (Mat_SeqSBAIJ *)yy->A->data; 1390 bnz = PetscBLASIntCast(xa->nz); 1391 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1392 xb = (Mat_SeqBAIJ *)xx->B->data; 1393 yb = (Mat_SeqBAIJ *)yy->B->data; 1394 bnz = PetscBLASIntCast(xb->nz); 1395 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1396 } else { 1397 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1398 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1399 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1406 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1407 { 1408 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1409 PetscInt rbs,cbs; 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1414 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1415 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1416 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1417 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1418 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1424 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1425 { 1426 PetscErrorCode ierr; 1427 PetscInt i; 1428 PetscTruth flg; 1429 1430 PetscFunctionBegin; 1431 for (i=0; i<n; i++) { 1432 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1433 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1434 } 1435 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 1440 /* -------------------------------------------------------------------*/ 1441 static struct _MatOps MatOps_Values = { 1442 MatSetValues_MPISBAIJ, 1443 MatGetRow_MPISBAIJ, 1444 MatRestoreRow_MPISBAIJ, 1445 MatMult_MPISBAIJ, 1446 /* 4*/ MatMultAdd_MPISBAIJ, 1447 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1448 MatMultAdd_MPISBAIJ, 1449 0, 1450 0, 1451 0, 1452 /*10*/ 0, 1453 0, 1454 0, 1455 MatSOR_MPISBAIJ, 1456 MatTranspose_MPISBAIJ, 1457 /*15*/ MatGetInfo_MPISBAIJ, 1458 MatEqual_MPISBAIJ, 1459 MatGetDiagonal_MPISBAIJ, 1460 MatDiagonalScale_MPISBAIJ, 1461 MatNorm_MPISBAIJ, 1462 /*20*/ MatAssemblyBegin_MPISBAIJ, 1463 MatAssemblyEnd_MPISBAIJ, 1464 MatSetOption_MPISBAIJ, 1465 MatZeroEntries_MPISBAIJ, 1466 /*24*/ 0, 1467 0, 1468 0, 1469 0, 1470 0, 1471 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1472 0, 1473 0, 1474 0, 1475 0, 1476 /*34*/ MatDuplicate_MPISBAIJ, 1477 0, 1478 0, 1479 0, 1480 0, 1481 /*39*/ MatAXPY_MPISBAIJ, 1482 MatGetSubMatrices_MPISBAIJ, 1483 MatIncreaseOverlap_MPISBAIJ, 1484 MatGetValues_MPISBAIJ, 1485 MatCopy_MPISBAIJ, 1486 /*44*/ 0, 1487 MatScale_MPISBAIJ, 1488 0, 1489 0, 1490 0, 1491 /*49*/ MatSetBlockSize_MPISBAIJ, 1492 0, 1493 0, 1494 0, 1495 0, 1496 /*54*/ 0, 1497 0, 1498 MatSetUnfactored_MPISBAIJ, 1499 0, 1500 MatSetValuesBlocked_MPISBAIJ, 1501 /*59*/ 0, 1502 0, 1503 0, 1504 0, 1505 0, 1506 /*64*/ 0, 1507 0, 1508 0, 1509 0, 1510 0, 1511 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1512 0, 1513 0, 1514 0, 1515 0, 1516 /*74*/ 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 /*79*/ 0, 1522 0, 1523 0, 1524 0, 1525 MatLoad_MPISBAIJ, 1526 /*84*/ 0, 1527 0, 1528 0, 1529 0, 1530 0, 1531 /*89*/ 0, 1532 0, 1533 0, 1534 0, 1535 0, 1536 /*94*/ 0, 1537 0, 1538 0, 1539 0, 1540 0, 1541 /*99*/ 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 /*104*/0, 1547 MatRealPart_MPISBAIJ, 1548 MatImaginaryPart_MPISBAIJ, 1549 MatGetRowUpperTriangular_MPISBAIJ, 1550 MatRestoreRowUpperTriangular_MPISBAIJ, 1551 /*109*/0, 1552 0, 1553 0, 1554 0, 1555 0, 1556 /*114*/0, 1557 0, 1558 0, 1559 0, 1560 0, 1561 /*119*/0, 1562 0, 1563 0, 1564 0 1565 }; 1566 1567 1568 EXTERN_C_BEGIN 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1571 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1572 { 1573 PetscFunctionBegin; 1574 *a = ((Mat_MPISBAIJ *)A->data)->A; 1575 *iscopy = PETSC_FALSE; 1576 PetscFunctionReturn(0); 1577 } 1578 EXTERN_C_END 1579 1580 EXTERN_C_BEGIN 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1583 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1584 { 1585 Mat_MPISBAIJ *b; 1586 PetscErrorCode ierr; 1587 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1588 1589 PetscFunctionBegin; 1590 if (bs < 0){ 1591 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1592 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1593 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1594 bs = PetscAbs(bs); 1595 } 1596 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1597 bs = newbs; 1598 1599 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1600 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1601 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1602 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1603 1604 B->rmap->bs = B->cmap->bs = bs; 1605 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1606 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1607 1608 if (d_nnz) { 1609 for (i=0; i<B->rmap->n/bs; i++) { 1610 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1611 } 1612 } 1613 if (o_nnz) { 1614 for (i=0; i<B->rmap->n/bs; i++) { 1615 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1616 } 1617 } 1618 1619 b = (Mat_MPISBAIJ*)B->data; 1620 mbs = B->rmap->n/bs; 1621 Mbs = B->rmap->N/bs; 1622 if (mbs*bs != B->rmap->n) { 1623 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1624 } 1625 1626 B->rmap->bs = bs; 1627 b->bs2 = bs*bs; 1628 b->mbs = mbs; 1629 b->nbs = mbs; 1630 b->Mbs = Mbs; 1631 b->Nbs = Mbs; 1632 1633 for (i=0; i<=b->size; i++) { 1634 b->rangebs[i] = B->rmap->range[i]/bs; 1635 } 1636 b->rstartbs = B->rmap->rstart/bs; 1637 b->rendbs = B->rmap->rend/bs; 1638 1639 b->cstartbs = B->cmap->rstart/bs; 1640 b->cendbs = B->cmap->rend/bs; 1641 1642 if (!B->preallocated) { 1643 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1644 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1645 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1646 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1647 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1648 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1649 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1650 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1651 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1652 } 1653 1654 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1655 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1656 B->preallocated = PETSC_TRUE; 1657 PetscFunctionReturn(0); 1658 } 1659 EXTERN_C_END 1660 1661 EXTERN_C_BEGIN 1662 #if defined(PETSC_HAVE_MUMPS) 1663 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1664 #endif 1665 #if defined(PETSC_HAVE_SPOOLES) 1666 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1667 #endif 1668 #if defined(PETSC_HAVE_PASTIX) 1669 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1670 #endif 1671 EXTERN_C_END 1672 1673 /*MC 1674 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1675 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1676 1677 Options Database Keys: 1678 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1679 1680 Level: beginner 1681 1682 .seealso: MatCreateMPISBAIJ 1683 M*/ 1684 1685 EXTERN_C_BEGIN 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatCreate_MPISBAIJ" 1688 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1689 { 1690 Mat_MPISBAIJ *b; 1691 PetscErrorCode ierr; 1692 PetscTruth flg; 1693 1694 PetscFunctionBegin; 1695 1696 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1697 B->data = (void*)b; 1698 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1699 1700 B->ops->destroy = MatDestroy_MPISBAIJ; 1701 B->ops->view = MatView_MPISBAIJ; 1702 B->mapping = 0; 1703 B->assembled = PETSC_FALSE; 1704 1705 B->insertmode = NOT_SET_VALUES; 1706 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1707 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1708 1709 /* build local table of row and column ownerships */ 1710 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1711 1712 /* build cache for off array entries formed */ 1713 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1714 b->donotstash = PETSC_FALSE; 1715 b->colmap = PETSC_NULL; 1716 b->garray = PETSC_NULL; 1717 b->roworiented = PETSC_TRUE; 1718 1719 /* stuff used in block assembly */ 1720 b->barray = 0; 1721 1722 /* stuff used for matrix vector multiply */ 1723 b->lvec = 0; 1724 b->Mvctx = 0; 1725 b->slvec0 = 0; 1726 b->slvec0b = 0; 1727 b->slvec1 = 0; 1728 b->slvec1a = 0; 1729 b->slvec1b = 0; 1730 b->sMvctx = 0; 1731 1732 /* stuff for MatGetRow() */ 1733 b->rowindices = 0; 1734 b->rowvalues = 0; 1735 b->getrowactive = PETSC_FALSE; 1736 1737 /* hash table stuff */ 1738 b->ht = 0; 1739 b->hd = 0; 1740 b->ht_size = 0; 1741 b->ht_flag = PETSC_FALSE; 1742 b->ht_fact = 0; 1743 b->ht_total_ct = 0; 1744 b->ht_insert_ct = 0; 1745 1746 b->in_loc = 0; 1747 b->v_loc = 0; 1748 b->n_loc = 0; 1749 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1750 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1751 if (flg) { 1752 PetscReal fact = 1.39; 1753 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1754 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1755 if (fact <= 1.0) fact = 1.39; 1756 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1757 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1758 } 1759 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1760 1761 #if defined(PETSC_HAVE_PASTIX) 1762 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1763 "MatGetFactor_mpisbaij_pastix", 1764 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1765 #endif 1766 #if defined(PETSC_HAVE_MUMPS) 1767 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1768 "MatGetFactor_sbaij_mumps", 1769 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1770 #endif 1771 #if defined(PETSC_HAVE_SPOOLES) 1772 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1773 "MatGetFactor_mpisbaij_spooles", 1774 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1775 #endif 1776 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1777 "MatStoreValues_MPISBAIJ", 1778 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1779 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1780 "MatRetrieveValues_MPISBAIJ", 1781 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1782 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1783 "MatGetDiagonalBlock_MPISBAIJ", 1784 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1785 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1786 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1787 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1788 B->symmetric = PETSC_TRUE; 1789 B->structurally_symmetric = PETSC_TRUE; 1790 B->symmetric_set = PETSC_TRUE; 1791 B->structurally_symmetric_set = PETSC_TRUE; 1792 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1793 PetscFunctionReturn(0); 1794 } 1795 EXTERN_C_END 1796 1797 /*MC 1798 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1799 1800 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1801 and MATMPISBAIJ otherwise. 1802 1803 Options Database Keys: 1804 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1805 1806 Level: beginner 1807 1808 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1809 M*/ 1810 1811 EXTERN_C_BEGIN 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatCreate_SBAIJ" 1814 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1815 { 1816 PetscErrorCode ierr; 1817 PetscMPIInt size; 1818 1819 PetscFunctionBegin; 1820 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1821 if (size == 1) { 1822 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1823 } else { 1824 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1825 } 1826 PetscFunctionReturn(0); 1827 } 1828 EXTERN_C_END 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1832 /*@C 1833 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1834 the user should preallocate the matrix storage by setting the parameters 1835 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1836 performance can be increased by more than a factor of 50. 1837 1838 Collective on Mat 1839 1840 Input Parameters: 1841 + A - the matrix 1842 . bs - size of blockk 1843 . d_nz - number of block nonzeros per block row in diagonal portion of local 1844 submatrix (same for all local rows) 1845 . d_nnz - array containing the number of block nonzeros in the various block rows 1846 in the upper triangular and diagonal part of the in diagonal portion of the local 1847 (possibly different for each block row) or PETSC_NULL. You must leave room 1848 for the diagonal entry even if it is zero. 1849 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1850 submatrix (same for all local rows). 1851 - o_nnz - array containing the number of nonzeros in the various block rows of the 1852 off-diagonal portion of the local submatrix (possibly different for 1853 each block row) or PETSC_NULL. 1854 1855 1856 Options Database Keys: 1857 . -mat_no_unroll - uses code that does not unroll the loops in the 1858 block calculations (much slower) 1859 . -mat_block_size - size of the blocks to use 1860 1861 Notes: 1862 1863 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1864 than it must be used on all processors that share the object for that argument. 1865 1866 If the *_nnz parameter is given then the *_nz parameter is ignored 1867 1868 Storage Information: 1869 For a square global matrix we define each processor's diagonal portion 1870 to be its local rows and the corresponding columns (a square submatrix); 1871 each processor's off-diagonal portion encompasses the remainder of the 1872 local matrix (a rectangular submatrix). 1873 1874 The user can specify preallocated storage for the diagonal part of 1875 the local submatrix with either d_nz or d_nnz (not both). Set 1876 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1877 memory allocation. Likewise, specify preallocated storage for the 1878 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1879 1880 You can call MatGetInfo() to get information on how effective the preallocation was; 1881 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1882 You can also run with the option -info and look for messages with the string 1883 malloc in them to see if additional memory allocation was needed. 1884 1885 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1886 the figure below we depict these three local rows and all columns (0-11). 1887 1888 .vb 1889 0 1 2 3 4 5 6 7 8 9 10 11 1890 ------------------- 1891 row 3 | o o o d d d o o o o o o 1892 row 4 | o o o d d d o o o o o o 1893 row 5 | o o o d d d o o o o o o 1894 ------------------- 1895 .ve 1896 1897 Thus, any entries in the d locations are stored in the d (diagonal) 1898 submatrix, and any entries in the o locations are stored in the 1899 o (off-diagonal) submatrix. Note that the d matrix is stored in 1900 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1901 1902 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1903 plus the diagonal part of the d matrix, 1904 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1905 In general, for PDE problems in which most nonzeros are near the diagonal, 1906 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1907 or you will get TERRIBLE performance; see the users' manual chapter on 1908 matrices. 1909 1910 Level: intermediate 1911 1912 .keywords: matrix, block, aij, compressed row, sparse, parallel 1913 1914 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1915 @*/ 1916 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1917 { 1918 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1919 1920 PetscFunctionBegin; 1921 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1922 if (f) { 1923 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1924 } 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "MatCreateMPISBAIJ" 1930 /*@C 1931 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1932 (block compressed row). For good matrix assembly performance 1933 the user should preallocate the matrix storage by setting the parameters 1934 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1935 performance can be increased by more than a factor of 50. 1936 1937 Collective on MPI_Comm 1938 1939 Input Parameters: 1940 + comm - MPI communicator 1941 . bs - size of blockk 1942 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1943 This value should be the same as the local size used in creating the 1944 y vector for the matrix-vector product y = Ax. 1945 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1946 This value should be the same as the local size used in creating the 1947 x vector for the matrix-vector product y = Ax. 1948 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1949 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1950 . d_nz - number of block nonzeros per block row in diagonal portion of local 1951 submatrix (same for all local rows) 1952 . d_nnz - array containing the number of block nonzeros in the various block rows 1953 in the upper triangular portion of the in diagonal portion of the local 1954 (possibly different for each block block row) or PETSC_NULL. 1955 You must leave room for the diagonal entry even if it is zero. 1956 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1957 submatrix (same for all local rows). 1958 - o_nnz - array containing the number of nonzeros in the various block rows of the 1959 off-diagonal portion of the local submatrix (possibly different for 1960 each block row) or PETSC_NULL. 1961 1962 Output Parameter: 1963 . A - the matrix 1964 1965 Options Database Keys: 1966 . -mat_no_unroll - uses code that does not unroll the loops in the 1967 block calculations (much slower) 1968 . -mat_block_size - size of the blocks to use 1969 . -mat_mpi - use the parallel matrix data structures even on one processor 1970 (defaults to using SeqBAIJ format on one processor) 1971 1972 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1973 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1974 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1975 1976 Notes: 1977 The number of rows and columns must be divisible by blocksize. 1978 This matrix type does not support complex Hermitian operation. 1979 1980 The user MUST specify either the local or global matrix dimensions 1981 (possibly both). 1982 1983 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1984 than it must be used on all processors that share the object for that argument. 1985 1986 If the *_nnz parameter is given then the *_nz parameter is ignored 1987 1988 Storage Information: 1989 For a square global matrix we define each processor's diagonal portion 1990 to be its local rows and the corresponding columns (a square submatrix); 1991 each processor's off-diagonal portion encompasses the remainder of the 1992 local matrix (a rectangular submatrix). 1993 1994 The user can specify preallocated storage for the diagonal part of 1995 the local submatrix with either d_nz or d_nnz (not both). Set 1996 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1997 memory allocation. Likewise, specify preallocated storage for the 1998 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1999 2000 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2001 the figure below we depict these three local rows and all columns (0-11). 2002 2003 .vb 2004 0 1 2 3 4 5 6 7 8 9 10 11 2005 ------------------- 2006 row 3 | o o o d d d o o o o o o 2007 row 4 | o o o d d d o o o o o o 2008 row 5 | o o o d d d o o o o o o 2009 ------------------- 2010 .ve 2011 2012 Thus, any entries in the d locations are stored in the d (diagonal) 2013 submatrix, and any entries in the o locations are stored in the 2014 o (off-diagonal) submatrix. Note that the d matrix is stored in 2015 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2016 2017 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2018 plus the diagonal part of the d matrix, 2019 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2020 In general, for PDE problems in which most nonzeros are near the diagonal, 2021 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2022 or you will get TERRIBLE performance; see the users' manual chapter on 2023 matrices. 2024 2025 Level: intermediate 2026 2027 .keywords: matrix, block, aij, compressed row, sparse, parallel 2028 2029 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2030 @*/ 2031 2032 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2033 { 2034 PetscErrorCode ierr; 2035 PetscMPIInt size; 2036 2037 PetscFunctionBegin; 2038 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2039 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2040 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2041 if (size > 1) { 2042 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2043 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2044 } else { 2045 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2046 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2047 } 2048 PetscFunctionReturn(0); 2049 } 2050 2051 2052 #undef __FUNCT__ 2053 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2054 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2055 { 2056 Mat mat; 2057 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2058 PetscErrorCode ierr; 2059 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2060 PetscScalar *array; 2061 2062 PetscFunctionBegin; 2063 *newmat = 0; 2064 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2065 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2066 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2067 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2068 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2069 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2070 2071 mat->factortype = matin->factortype; 2072 mat->preallocated = PETSC_TRUE; 2073 mat->assembled = PETSC_TRUE; 2074 mat->insertmode = NOT_SET_VALUES; 2075 2076 a = (Mat_MPISBAIJ*)mat->data; 2077 a->bs2 = oldmat->bs2; 2078 a->mbs = oldmat->mbs; 2079 a->nbs = oldmat->nbs; 2080 a->Mbs = oldmat->Mbs; 2081 a->Nbs = oldmat->Nbs; 2082 2083 2084 a->size = oldmat->size; 2085 a->rank = oldmat->rank; 2086 a->donotstash = oldmat->donotstash; 2087 a->roworiented = oldmat->roworiented; 2088 a->rowindices = 0; 2089 a->rowvalues = 0; 2090 a->getrowactive = PETSC_FALSE; 2091 a->barray = 0; 2092 a->rstartbs = oldmat->rstartbs; 2093 a->rendbs = oldmat->rendbs; 2094 a->cstartbs = oldmat->cstartbs; 2095 a->cendbs = oldmat->cendbs; 2096 2097 /* hash table stuff */ 2098 a->ht = 0; 2099 a->hd = 0; 2100 a->ht_size = 0; 2101 a->ht_flag = oldmat->ht_flag; 2102 a->ht_fact = oldmat->ht_fact; 2103 a->ht_total_ct = 0; 2104 a->ht_insert_ct = 0; 2105 2106 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2107 if (oldmat->colmap) { 2108 #if defined (PETSC_USE_CTABLE) 2109 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2110 #else 2111 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2112 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2113 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2114 #endif 2115 } else a->colmap = 0; 2116 2117 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2118 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2119 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2120 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2121 } else a->garray = 0; 2122 2123 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2124 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2125 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2126 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2127 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2128 2129 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2130 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2131 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2132 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2133 2134 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2135 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2136 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2137 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2138 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2139 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2140 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2141 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2142 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2143 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2144 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2145 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2146 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2147 2148 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2149 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2150 a->sMvctx = oldmat->sMvctx; 2151 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2152 2153 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2154 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2155 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2156 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2157 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2158 *newmat = mat; 2159 PetscFunctionReturn(0); 2160 } 2161 2162 #undef __FUNCT__ 2163 #define __FUNCT__ "MatLoad_MPISBAIJ" 2164 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2165 { 2166 PetscErrorCode ierr; 2167 PetscInt i,nz,j,rstart,rend; 2168 PetscScalar *vals,*buf; 2169 MPI_Comm comm = ((PetscObject)viewer)->comm; 2170 MPI_Status status; 2171 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2172 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2173 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2174 PetscInt bs=1,Mbs,mbs,extra_rows; 2175 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2176 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2177 int fd; 2178 2179 PetscFunctionBegin; 2180 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2181 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2182 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2183 2184 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2185 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2186 if (!rank) { 2187 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2188 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2189 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2190 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2191 } 2192 2193 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2194 2195 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2196 M = header[1]; N = header[2]; 2197 2198 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2199 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2200 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2201 2202 /* If global sizes are set, check if they are consistent with that given in the file */ 2203 if (sizesset) { 2204 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2205 } 2206 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2207 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2208 2209 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2210 2211 /* 2212 This code adds extra rows to make sure the number of rows is 2213 divisible by the blocksize 2214 */ 2215 Mbs = M/bs; 2216 extra_rows = bs - M + bs*(Mbs); 2217 if (extra_rows == bs) extra_rows = 0; 2218 else Mbs++; 2219 if (extra_rows &&!rank) { 2220 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2221 } 2222 2223 /* determine ownership of all rows */ 2224 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2225 mbs = Mbs/size + ((Mbs % size) > rank); 2226 m = mbs*bs; 2227 } else { /* User Set */ 2228 m = newmat->rmap->n; 2229 mbs = m/bs; 2230 } 2231 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2232 mmbs = PetscMPIIntCast(mbs); 2233 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2234 rowners[0] = 0; 2235 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2236 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2237 rstart = rowners[rank]; 2238 rend = rowners[rank+1]; 2239 2240 /* distribute row lengths to all processors */ 2241 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2242 if (!rank) { 2243 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2244 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2245 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2246 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2247 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2248 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2249 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2250 } else { 2251 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2252 } 2253 2254 if (!rank) { /* procs[0] */ 2255 /* calculate the number of nonzeros on each processor */ 2256 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2257 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2258 for (i=0; i<size; i++) { 2259 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2260 procsnz[i] += rowlengths[j]; 2261 } 2262 } 2263 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2264 2265 /* determine max buffer needed and allocate it */ 2266 maxnz = 0; 2267 for (i=0; i<size; i++) { 2268 maxnz = PetscMax(maxnz,procsnz[i]); 2269 } 2270 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2271 2272 /* read in my part of the matrix column indices */ 2273 nz = procsnz[0]; 2274 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2275 mycols = ibuf; 2276 if (size == 1) nz -= extra_rows; 2277 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2278 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2279 2280 /* read in every ones (except the last) and ship off */ 2281 for (i=1; i<size-1; i++) { 2282 nz = procsnz[i]; 2283 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2284 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2285 } 2286 /* read in the stuff for the last proc */ 2287 if (size != 1) { 2288 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2289 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2290 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2291 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2292 } 2293 ierr = PetscFree(cols);CHKERRQ(ierr); 2294 } else { /* procs[i], i>0 */ 2295 /* determine buffer space needed for message */ 2296 nz = 0; 2297 for (i=0; i<m; i++) { 2298 nz += locrowlens[i]; 2299 } 2300 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2301 mycols = ibuf; 2302 /* receive message of column indices*/ 2303 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2304 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2305 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2306 } 2307 2308 /* loop over local rows, determining number of off diagonal entries */ 2309 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2310 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2311 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2312 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2313 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2314 rowcount = 0; 2315 nzcount = 0; 2316 for (i=0; i<mbs; i++) { 2317 dcount = 0; 2318 odcount = 0; 2319 for (j=0; j<bs; j++) { 2320 kmax = locrowlens[rowcount]; 2321 for (k=0; k<kmax; k++) { 2322 tmp = mycols[nzcount++]/bs; /* block col. index */ 2323 if (!mask[tmp]) { 2324 mask[tmp] = 1; 2325 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2326 else masked1[dcount++] = tmp; /* entry in diag portion */ 2327 } 2328 } 2329 rowcount++; 2330 } 2331 2332 dlens[i] = dcount; /* d_nzz[i] */ 2333 odlens[i] = odcount; /* o_nzz[i] */ 2334 2335 /* zero out the mask elements we set */ 2336 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2337 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2338 } 2339 if (!sizesset) { 2340 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2341 } 2342 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2343 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2344 2345 if (!rank) { 2346 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2347 /* read in my part of the matrix numerical values */ 2348 nz = procsnz[0]; 2349 vals = buf; 2350 mycols = ibuf; 2351 if (size == 1) nz -= extra_rows; 2352 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2353 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2354 2355 /* insert into matrix */ 2356 jj = rstart*bs; 2357 for (i=0; i<m; i++) { 2358 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2359 mycols += locrowlens[i]; 2360 vals += locrowlens[i]; 2361 jj++; 2362 } 2363 2364 /* read in other processors (except the last one) and ship out */ 2365 for (i=1; i<size-1; i++) { 2366 nz = procsnz[i]; 2367 vals = buf; 2368 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2369 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2370 } 2371 /* the last proc */ 2372 if (size != 1){ 2373 nz = procsnz[i] - extra_rows; 2374 vals = buf; 2375 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2376 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2377 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2378 } 2379 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2380 2381 } else { 2382 /* receive numeric values */ 2383 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2384 2385 /* receive message of values*/ 2386 vals = buf; 2387 mycols = ibuf; 2388 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2389 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2390 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2391 2392 /* insert into matrix */ 2393 jj = rstart*bs; 2394 for (i=0; i<m; i++) { 2395 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2396 mycols += locrowlens[i]; 2397 vals += locrowlens[i]; 2398 jj++; 2399 } 2400 } 2401 2402 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2403 ierr = PetscFree(buf);CHKERRQ(ierr); 2404 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2405 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2406 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2407 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2408 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2409 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2410 PetscFunctionReturn(0); 2411 } 2412 2413 #undef __FUNCT__ 2414 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2415 /*XXXXX@ 2416 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2417 2418 Input Parameters: 2419 . mat - the matrix 2420 . fact - factor 2421 2422 Not Collective on Mat, each process can have a different hash factor 2423 2424 Level: advanced 2425 2426 Notes: 2427 This can also be set by the command line option: -mat_use_hash_table fact 2428 2429 .keywords: matrix, hashtable, factor, HT 2430 2431 .seealso: MatSetOption() 2432 @XXXXX*/ 2433 2434 2435 #undef __FUNCT__ 2436 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2437 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2438 { 2439 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2440 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2441 PetscReal atmp; 2442 PetscReal *work,*svalues,*rvalues; 2443 PetscErrorCode ierr; 2444 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2445 PetscMPIInt rank,size; 2446 PetscInt *rowners_bs,dest,count,source; 2447 PetscScalar *va; 2448 MatScalar *ba; 2449 MPI_Status stat; 2450 2451 PetscFunctionBegin; 2452 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2453 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2454 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2455 2456 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2457 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2458 2459 bs = A->rmap->bs; 2460 mbs = a->mbs; 2461 Mbs = a->Mbs; 2462 ba = b->a; 2463 bi = b->i; 2464 bj = b->j; 2465 2466 /* find ownerships */ 2467 rowners_bs = A->rmap->range; 2468 2469 /* each proc creates an array to be distributed */ 2470 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2471 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2472 2473 /* row_max for B */ 2474 if (rank != size-1){ 2475 for (i=0; i<mbs; i++) { 2476 ncols = bi[1] - bi[0]; bi++; 2477 brow = bs*i; 2478 for (j=0; j<ncols; j++){ 2479 bcol = bs*(*bj); 2480 for (kcol=0; kcol<bs; kcol++){ 2481 col = bcol + kcol; /* local col index */ 2482 col += rowners_bs[rank+1]; /* global col index */ 2483 for (krow=0; krow<bs; krow++){ 2484 atmp = PetscAbsScalar(*ba); ba++; 2485 row = brow + krow; /* local row index */ 2486 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2487 if (work[col] < atmp) work[col] = atmp; 2488 } 2489 } 2490 bj++; 2491 } 2492 } 2493 2494 /* send values to its owners */ 2495 for (dest=rank+1; dest<size; dest++){ 2496 svalues = work + rowners_bs[dest]; 2497 count = rowners_bs[dest+1]-rowners_bs[dest]; 2498 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2499 } 2500 } 2501 2502 /* receive values */ 2503 if (rank){ 2504 rvalues = work; 2505 count = rowners_bs[rank+1]-rowners_bs[rank]; 2506 for (source=0; source<rank; source++){ 2507 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2508 /* process values */ 2509 for (i=0; i<count; i++){ 2510 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2511 } 2512 } 2513 } 2514 2515 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2516 ierr = PetscFree(work);CHKERRQ(ierr); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 #undef __FUNCT__ 2521 #define __FUNCT__ "MatSOR_MPISBAIJ" 2522 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2523 { 2524 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2525 PetscErrorCode ierr; 2526 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2527 PetscScalar *x,*ptr,*from; 2528 Vec bb1; 2529 const PetscScalar *b; 2530 2531 PetscFunctionBegin; 2532 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2533 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2534 2535 if (flag == SOR_APPLY_UPPER) { 2536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2541 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2542 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2543 its--; 2544 } 2545 2546 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2547 while (its--){ 2548 2549 /* lower triangular part: slvec0b = - B^T*xx */ 2550 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2551 2552 /* copy xx into slvec0a */ 2553 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2554 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2555 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2556 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2557 2558 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2559 2560 /* copy bb into slvec1a */ 2561 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2562 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2563 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2564 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2565 2566 /* set slvec1b = 0 */ 2567 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2568 2569 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2570 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2571 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2572 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2573 2574 /* upper triangular part: bb1 = bb1 - B*x */ 2575 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2576 2577 /* local diagonal sweep */ 2578 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2579 } 2580 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2581 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2582 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2583 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2584 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2585 } else if (flag & SOR_EISENSTAT) { 2586 Vec xx1; 2587 PetscTruth hasop; 2588 const PetscScalar *diag; 2589 PetscScalar *sl,scale = (omega - 2.0)/omega; 2590 PetscInt i,n; 2591 2592 if (!mat->xx1) { 2593 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2594 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2595 } 2596 xx1 = mat->xx1; 2597 bb1 = mat->bb1; 2598 2599 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2600 2601 if (!mat->diag) { 2602 /* this is wrong for same matrix with new nonzero values */ 2603 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2604 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2605 } 2606 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2607 2608 if (hasop) { 2609 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2610 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2611 } else { 2612 /* 2613 These two lines are replaced by code that may be a bit faster for a good compiler 2614 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2615 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2616 */ 2617 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2618 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2619 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2620 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2621 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2622 if (omega == 1.0) { 2623 for (i=0; i<n; i++) { 2624 sl[i] = b[i] - diag[i]*x[i]; 2625 } 2626 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2627 } else { 2628 for (i=0; i<n; i++) { 2629 sl[i] = b[i] + scale*diag[i]*x[i]; 2630 } 2631 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2632 } 2633 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2634 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2635 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2636 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2637 } 2638 2639 /* multiply off-diagonal portion of matrix */ 2640 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2641 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2642 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2643 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2644 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2645 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2646 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2647 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2648 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2649 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2650 2651 /* local sweep */ 2652 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2653 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2654 } else { 2655 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2656 } 2657 PetscFunctionReturn(0); 2658 } 2659 2660 #undef __FUNCT__ 2661 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2662 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2663 { 2664 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2665 PetscErrorCode ierr; 2666 Vec lvec1,bb1; 2667 2668 PetscFunctionBegin; 2669 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2670 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2671 2672 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2673 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2675 its--; 2676 } 2677 2678 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2679 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2680 while (its--){ 2681 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2682 2683 /* lower diagonal part: bb1 = bb - B^T*xx */ 2684 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2685 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2686 2687 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2688 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2689 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2690 2691 /* upper diagonal part: bb1 = bb1 - B*x */ 2692 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2693 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2694 2695 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2696 2697 /* diagonal sweep */ 2698 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2699 } 2700 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2701 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2702 } else { 2703 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2704 } 2705 PetscFunctionReturn(0); 2706 } 2707 2708