1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 104 N = nrow++ - 1; \ 105 /* shift up all the later entries in this row */ \ 106 for (ii=N; ii>=_i; ii--) { \ 107 rp[ii+1] = rp[ii]; \ 108 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 109 } \ 110 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 111 rp[_i] = bcol; \ 112 ap[bs2*_i + bs*cidx + ridx] = value; \ 113 a_noinsert:; \ 114 ailen[brow] = nrow; \ 115 } 116 #ifndef MatSetValues_SeqBAIJ_B_Private 117 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 118 { \ 119 brow = row/bs; \ 120 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 121 rmax = bimax[brow]; nrow = bilen[brow]; \ 122 bcol = col/bs; \ 123 ridx = row % bs; cidx = col % bs; \ 124 low = 0; high = nrow; \ 125 while (high-low > 3) { \ 126 t = (low+high)/2; \ 127 if (rp[t] > bcol) high = t; \ 128 else low = t; \ 129 } \ 130 for (_i=low; _i<high; _i++) { \ 131 if (rp[_i] > bcol) break; \ 132 if (rp[_i] == bcol) { \ 133 bap = ap + bs2*_i + bs*cidx + ridx; \ 134 if (addv == ADD_VALUES) *bap += value; \ 135 else *bap = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (b->nonew == 1) goto b_noinsert; \ 140 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 141 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 b_noinsert:; \ 152 bilen[brow] = nrow; \ 153 } 154 #endif 155 156 #if defined(PETSC_USE_MAT_SINGLE) 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetValues_MPISBAIJ" 159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 160 { 161 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,N = m*n; 164 MatScalar *vsingle; 165 166 PetscFunctionBegin; 167 if (N > b->setvalueslen) { 168 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 169 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 170 b->setvalueslen = N; 171 } 172 vsingle = b->setvaluescopy; 173 174 for (i=0; i<N; i++) { 175 vsingle[i] = v[i]; 176 } 177 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 184 { 185 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 186 PetscErrorCode ierr; 187 PetscInt i,N = m*n*b->bs2; 188 MatScalar *vsingle; 189 190 PetscFunctionBegin; 191 if (N > b->setvalueslen) { 192 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 193 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 194 b->setvalueslen = N; 195 } 196 vsingle = b->setvaluescopy; 197 for (i=0; i<N; i++) { 198 vsingle[i] = v[i]; 199 } 200 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 #endif 204 205 /* Only add/insert a(i,j) with i<=j (blocks). 206 Any a(i,j) with i>j input by user is ingored. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 211 { 212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 213 MatScalar value; 214 PetscTruth roworiented = baij->roworiented; 215 PetscErrorCode ierr; 216 PetscInt i,j,row,col; 217 PetscInt rstart_orig=baij->rstart_bs; 218 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 219 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 220 221 /* Some Variables required in the macro */ 222 Mat A = baij->A; 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 224 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 225 MatScalar *aa=a->a; 226 227 Mat B = baij->B; 228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 229 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 230 MatScalar *ba=b->a; 231 232 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 233 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 234 MatScalar *ap,*bap; 235 236 /* for stash */ 237 PetscInt n_loc, *in_loc=0; 238 MatScalar *v_loc=0; 239 240 PetscFunctionBegin; 241 242 if(!baij->donotstash){ 243 ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr); 244 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 245 } 246 247 for (i=0; i<m; i++) { 248 if (im[i] < 0) continue; 249 #if defined(PETSC_USE_DEBUG) 250 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 251 #endif 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 253 row = im[i] - rstart_orig; /* local row index */ 254 for (j=0; j<n; j++) { 255 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 256 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 257 col = in[j] - cstart_orig; /* local col index */ 258 brow = row/bs; bcol = col/bs; 259 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 260 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 261 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } else if (in[j] < 0) continue; 264 #if defined(PETSC_USE_DEBUG) 265 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 266 #endif 267 else { /* off-diag entry (B) */ 268 if (mat->was_assembled) { 269 if (!baij->colmap) { 270 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 271 } 272 #if defined (PETSC_USE_CTABLE) 273 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 274 col = col - 1; 275 #else 276 col = baij->colmap[in[j]/bs] - 1; 277 #endif 278 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 279 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 280 col = in[j]; 281 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 282 B = baij->B; 283 b = (Mat_SeqBAIJ*)(B)->data; 284 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 285 ba=b->a; 286 } else col += in[j]%bs; 287 } else col = in[j]; 288 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 289 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 290 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 291 } 292 } 293 } else { /* off processor entry */ 294 if (!baij->donotstash) { 295 n_loc = 0; 296 for (j=0; j<n; j++){ 297 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 298 in_loc[n_loc] = in[j]; 299 if (roworiented) { 300 v_loc[n_loc] = v[i*n+j]; 301 } else { 302 v_loc[n_loc] = v[j*m+i]; 303 } 304 n_loc++; 305 } 306 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 307 } 308 } 309 } 310 311 if(!baij->donotstash){ 312 ierr = PetscFree(in_loc);CHKERRQ(ierr); 313 ierr = PetscFree(v_loc);CHKERRQ(ierr); 314 } 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 321 { 322 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 323 const MatScalar *value; 324 MatScalar *barray=baij->barray; 325 PetscTruth roworiented = baij->roworiented; 326 PetscErrorCode ierr; 327 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 328 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 329 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 330 331 PetscFunctionBegin; 332 if(!barray) { 333 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 334 baij->barray = barray; 335 } 336 337 if (roworiented) { 338 stepval = (n-1)*bs; 339 } else { 340 stepval = (m-1)*bs; 341 } 342 for (i=0; i<m; i++) { 343 if (im[i] < 0) continue; 344 #if defined(PETSC_USE_DEBUG) 345 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 346 #endif 347 if (im[i] >= rstart && im[i] < rend) { 348 row = im[i] - rstart; 349 for (j=0; j<n; j++) { 350 /* If NumCol = 1 then a copy is not required */ 351 if ((roworiented) && (n == 1)) { 352 barray = (MatScalar*) v + i*bs2; 353 } else if((!roworiented) && (m == 1)) { 354 barray = (MatScalar*) v + j*bs2; 355 } else { /* Here a copy is required */ 356 if (roworiented) { 357 value = v + i*(stepval+bs)*bs + j*bs; 358 } else { 359 value = v + j*(stepval+bs)*bs + i*bs; 360 } 361 for (ii=0; ii<bs; ii++,value+=stepval) { 362 for (jj=0; jj<bs; jj++) { 363 *barray++ = *value++; 364 } 365 } 366 barray -=bs2; 367 } 368 369 if (in[j] >= cstart && in[j] < cend){ 370 col = in[j] - cstart; 371 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 372 } 373 else if (in[j] < 0) continue; 374 #if defined(PETSC_USE_DEBUG) 375 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 376 #endif 377 else { 378 if (mat->was_assembled) { 379 if (!baij->colmap) { 380 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 381 } 382 383 #if defined(PETSC_USE_DEBUG) 384 #if defined (PETSC_USE_CTABLE) 385 { PetscInt data; 386 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 387 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 388 } 389 #else 390 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 391 #endif 392 #endif 393 #if defined (PETSC_USE_CTABLE) 394 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 395 col = (col - 1)/bs; 396 #else 397 col = (baij->colmap[in[j]] - 1)/bs; 398 #endif 399 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 400 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 401 col = in[j]; 402 } 403 } 404 else col = in[j]; 405 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 406 } 407 } 408 } else { 409 if (!baij->donotstash) { 410 if (roworiented) { 411 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 412 } else { 413 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 414 } 415 } 416 } 417 } 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatGetValues_MPISBAIJ" 423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 424 { 425 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 426 PetscErrorCode ierr; 427 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 428 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 429 430 PetscFunctionBegin; 431 for (i=0; i<m; i++) { 432 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 433 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 434 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 435 row = idxm[i] - bsrstart; 436 for (j=0; j<n; j++) { 437 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 438 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 439 if (idxn[j] >= bscstart && idxn[j] < bscend){ 440 col = idxn[j] - bscstart; 441 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } else { 443 if (!baij->colmap) { 444 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 445 } 446 #if defined (PETSC_USE_CTABLE) 447 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 448 data --; 449 #else 450 data = baij->colmap[idxn[j]/bs]-1; 451 #endif 452 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 453 else { 454 col = data + idxn[j]%bs; 455 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 456 } 457 } 458 } 459 } else { 460 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 461 } 462 } 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatNorm_MPISBAIJ" 468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 469 { 470 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 471 PetscErrorCode ierr; 472 PetscReal sum[2],*lnorm2; 473 474 PetscFunctionBegin; 475 if (baij->size == 1) { 476 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 477 } else { 478 if (type == NORM_FROBENIUS) { 479 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 480 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 481 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 482 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 483 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 484 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 485 *norm = sqrt(sum[0] + 2*sum[1]); 486 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 487 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 488 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 489 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 490 PetscReal *rsum,*rsum2,vabs; 491 PetscInt *jj,*garray=baij->garray,rstart=baij->rstart,nz; 492 PetscInt brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs; 493 MatScalar *v; 494 495 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 496 rsum2 = rsum + mat->N; 497 ierr = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 498 /* Amat */ 499 v = amat->a; jj = amat->j; 500 for (brow=0; brow<mbs; brow++) { 501 grow = bs*(rstart + brow); 502 nz = amat->i[brow+1] - amat->i[brow]; 503 for (bcol=0; bcol<nz; bcol++){ 504 gcol = bs*(rstart + *jj); jj++; 505 for (col=0; col<bs; col++){ 506 for (row=0; row<bs; row++){ 507 vabs = PetscAbsScalar(*v); v++; 508 rsum[gcol+col] += vabs; 509 /* non-diagonal block */ 510 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 511 } 512 } 513 } 514 } 515 /* Bmat */ 516 v = bmat->a; jj = bmat->j; 517 for (brow=0; brow<mbs; brow++) { 518 grow = bs*(rstart + brow); 519 nz = bmat->i[brow+1] - bmat->i[brow]; 520 for (bcol=0; bcol<nz; bcol++){ 521 gcol = bs*garray[*jj]; jj++; 522 for (col=0; col<bs; col++){ 523 for (row=0; row<bs; row++){ 524 vabs = PetscAbsScalar(*v); v++; 525 rsum[gcol+col] += vabs; 526 rsum[grow+row] += vabs; 527 } 528 } 529 } 530 } 531 ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 532 *norm = 0.0; 533 for (col=0; col<mat->N; col++) { 534 if (rsum2[col] > *norm) *norm = rsum2[col]; 535 } 536 ierr = PetscFree(rsum);CHKERRQ(ierr); 537 } else { 538 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 539 } 540 } 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 547 { 548 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 549 PetscErrorCode ierr; 550 PetscInt nstash,reallocs; 551 InsertMode addv; 552 553 PetscFunctionBegin; 554 if (baij->donotstash) { 555 PetscFunctionReturn(0); 556 } 557 558 /* make sure all processors are either in INSERTMODE or ADDMODE */ 559 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 560 if (addv == (ADD_VALUES|INSERT_VALUES)) { 561 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 562 } 563 mat->insertmode = addv; /* in case this processor had no cache */ 564 565 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 566 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 567 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 568 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 569 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 570 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 577 { 578 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 579 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 580 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 581 PetscErrorCode ierr; 582 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 583 PetscInt *row,*col,other_disassembled; 584 PetscMPIInt n; 585 PetscTruth r1,r2,r3; 586 MatScalar *val; 587 InsertMode addv = mat->insertmode; 588 589 PetscFunctionBegin; 590 591 if (!baij->donotstash) { 592 while (1) { 593 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 594 if (!flg) break; 595 596 for (i=0; i<n;) { 597 /* Now identify the consecutive vals belonging to the same row */ 598 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 599 if (j < n) ncols = j-i; 600 else ncols = n-i; 601 /* Now assemble all these values with a single function call */ 602 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 603 i = j; 604 } 605 } 606 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 607 /* Now process the block-stash. Since the values are stashed column-oriented, 608 set the roworiented flag to column oriented, and after MatSetValues() 609 restore the original flags */ 610 r1 = baij->roworiented; 611 r2 = a->roworiented; 612 r3 = b->roworiented; 613 baij->roworiented = PETSC_FALSE; 614 a->roworiented = PETSC_FALSE; 615 b->roworiented = PETSC_FALSE; 616 while (1) { 617 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 618 if (!flg) break; 619 620 for (i=0; i<n;) { 621 /* Now identify the consecutive vals belonging to the same row */ 622 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 623 if (j < n) ncols = j-i; 624 else ncols = n-i; 625 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 626 i = j; 627 } 628 } 629 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 630 baij->roworiented = r1; 631 a->roworiented = r2; 632 b->roworiented = r3; 633 } 634 635 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 636 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 637 638 /* determine if any processor has disassembled, if so we must 639 also disassemble ourselfs, in order that we may reassemble. */ 640 /* 641 if nonzero structure of submatrix B cannot change then we know that 642 no processor disassembled thus we can skip this stuff 643 */ 644 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 645 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 646 if (mat->was_assembled && !other_disassembled) { 647 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 648 } 649 } 650 651 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 652 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 653 } 654 b->compressedrow.use = PETSC_TRUE; 655 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 656 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 657 658 if (baij->rowvalues) { 659 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 660 baij->rowvalues = 0; 661 } 662 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt bs = mat->bs; 673 PetscMPIInt size = baij->size,rank = baij->rank; 674 PetscTruth iascii,isdraw; 675 PetscViewer sviewer; 676 PetscViewerFormat format; 677 678 PetscFunctionBegin; 679 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 680 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 681 if (iascii) { 682 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 683 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 684 MatInfo info; 685 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 686 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 687 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 688 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 689 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 690 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 692 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 694 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 695 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } else if (format == PETSC_VIEWER_ASCII_INFO) { 698 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } 701 } 702 703 if (isdraw) { 704 PetscDraw draw; 705 PetscTruth isnull; 706 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 707 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 708 } 709 710 if (size == 1) { 711 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 712 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 713 } else { 714 /* assemble the entire matrix onto first processor. */ 715 Mat A; 716 Mat_SeqSBAIJ *Aloc; 717 Mat_SeqBAIJ *Bloc; 718 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 719 MatScalar *a; 720 721 /* Should this be the same type as mat? */ 722 if (!rank) { 723 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 724 } else { 725 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 726 } 727 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 728 ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 729 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 730 731 /* copy over the A part */ 732 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 733 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 734 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 735 736 for (i=0; i<mbs; i++) { 737 rvals[0] = bs*(baij->rstart + i); 738 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 739 for (j=ai[i]; j<ai[i+1]; j++) { 740 col = (baij->cstart+aj[j])*bs; 741 for (k=0; k<bs; k++) { 742 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 743 col++; a += bs; 744 } 745 } 746 } 747 /* copy over the B part */ 748 Bloc = (Mat_SeqBAIJ*)baij->B->data; 749 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 750 for (i=0; i<mbs; i++) { 751 rvals[0] = bs*(baij->rstart + i); 752 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 753 for (j=ai[i]; j<ai[i+1]; j++) { 754 col = baij->garray[aj[j]]*bs; 755 for (k=0; k<bs; k++) { 756 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 757 col++; a += bs; 758 } 759 } 760 } 761 ierr = PetscFree(rvals);CHKERRQ(ierr); 762 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 763 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 764 /* 765 Everyone has to call to draw the matrix since the graphics waits are 766 synchronized across all processors that share the PetscDraw object 767 */ 768 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 769 if (!rank) { 770 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 771 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 772 } 773 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 774 ierr = MatDestroy(A);CHKERRQ(ierr); 775 } 776 PetscFunctionReturn(0); 777 } 778 779 #undef __FUNCT__ 780 #define __FUNCT__ "MatView_MPISBAIJ" 781 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 782 { 783 PetscErrorCode ierr; 784 PetscTruth iascii,isdraw,issocket,isbinary; 785 786 PetscFunctionBegin; 787 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 788 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 789 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 790 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 791 if (iascii || isdraw || issocket || isbinary) { 792 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 793 } else { 794 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 #undef __FUNCT__ 800 #define __FUNCT__ "MatDestroy_MPISBAIJ" 801 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 802 { 803 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 804 PetscErrorCode ierr; 805 806 PetscFunctionBegin; 807 #if defined(PETSC_USE_LOG) 808 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 809 #endif 810 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 811 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 812 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 813 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 814 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 815 #if defined (PETSC_USE_CTABLE) 816 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 817 #else 818 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 819 #endif 820 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 821 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 822 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 823 if (baij->slvec0) { 824 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 825 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 826 } 827 if (baij->slvec1) { 828 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 829 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 830 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 831 } 832 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 833 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 834 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 835 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 836 #if defined(PETSC_USE_MAT_SINGLE) 837 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 838 #endif 839 ierr = PetscFree(baij);CHKERRQ(ierr); 840 841 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 842 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 843 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 844 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatMult_MPISBAIJ" 850 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 851 { 852 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 853 PetscErrorCode ierr; 854 PetscInt nt,mbs=a->mbs,bs=A->bs; 855 PetscScalar *x,*from,zero=0.0; 856 857 PetscFunctionBegin; 858 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 859 if (nt != A->n) { 860 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 861 } 862 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 863 if (nt != A->m) { 864 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 865 } 866 867 /* diagonal part */ 868 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 869 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 870 871 /* subdiagonal part */ 872 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 873 874 /* copy x into the vec slvec0 */ 875 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 876 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 877 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 878 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 879 880 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 881 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 882 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 883 884 /* supperdiagonal part */ 885 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 886 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 892 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 893 { 894 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 895 PetscErrorCode ierr; 896 PetscInt nt; 897 898 PetscFunctionBegin; 899 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 900 if (nt != A->n) { 901 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 902 } 903 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 904 if (nt != A->m) { 905 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 906 } 907 908 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 909 /* do diagonal part */ 910 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 911 /* do supperdiagonal part */ 912 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 913 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 914 /* do subdiagonal part */ 915 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 916 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 917 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 918 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 924 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 925 { 926 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 927 PetscErrorCode ierr; 928 PetscInt mbs=a->mbs,bs=A->bs; 929 PetscScalar *x,*from,zero=0.0; 930 931 PetscFunctionBegin; 932 /* 933 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 934 PetscSynchronizedFlush(A->comm); 935 */ 936 /* diagonal part */ 937 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 938 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 939 940 /* subdiagonal part */ 941 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 942 943 /* copy x into the vec slvec0 */ 944 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 945 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 946 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 947 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 948 949 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 950 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 951 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 952 953 /* supperdiagonal part */ 954 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 955 956 PetscFunctionReturn(0); 957 } 958 959 #undef __FUNCT__ 960 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 961 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 962 { 963 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 964 PetscErrorCode ierr; 965 966 PetscFunctionBegin; 967 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 968 /* do diagonal part */ 969 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 970 /* do supperdiagonal part */ 971 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 972 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 973 974 /* do subdiagonal part */ 975 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 976 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 977 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 978 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 984 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 985 { 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = MatMult(A,xx,yy);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 995 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 996 { 997 PetscErrorCode ierr; 998 999 PetscFunctionBegin; 1000 ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 /* 1005 This only works correctly for square matrices where the subblock A->A is the 1006 diagonal block 1007 */ 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1010 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1011 { 1012 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1013 PetscErrorCode ierr; 1014 1015 PetscFunctionBegin; 1016 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1017 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatScale_MPISBAIJ" 1023 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1024 { 1025 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1026 PetscErrorCode ierr; 1027 1028 PetscFunctionBegin; 1029 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1030 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1031 PetscFunctionReturn(0); 1032 } 1033 1034 #undef __FUNCT__ 1035 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1036 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1037 { 1038 PetscFunctionBegin; 1039 SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format"); 1040 PetscFunctionReturn(0); 1041 } 1042 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1045 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1046 { 1047 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1048 1049 PetscFunctionBegin; 1050 if (!baij->getrowactive) { 1051 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1052 } 1053 baij->getrowactive = PETSC_FALSE; 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1059 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1060 { 1061 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1062 PetscErrorCode ierr; 1063 1064 PetscFunctionBegin; 1065 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1066 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1072 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1073 { 1074 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1075 Mat A = a->A,B = a->B; 1076 PetscErrorCode ierr; 1077 PetscReal isend[5],irecv[5]; 1078 1079 PetscFunctionBegin; 1080 info->block_size = (PetscReal)matin->bs; 1081 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1082 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1083 isend[3] = info->memory; isend[4] = info->mallocs; 1084 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1085 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1086 isend[3] += info->memory; isend[4] += info->mallocs; 1087 if (flag == MAT_LOCAL) { 1088 info->nz_used = isend[0]; 1089 info->nz_allocated = isend[1]; 1090 info->nz_unneeded = isend[2]; 1091 info->memory = isend[3]; 1092 info->mallocs = isend[4]; 1093 } else if (flag == MAT_GLOBAL_MAX) { 1094 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1095 info->nz_used = irecv[0]; 1096 info->nz_allocated = irecv[1]; 1097 info->nz_unneeded = irecv[2]; 1098 info->memory = irecv[3]; 1099 info->mallocs = irecv[4]; 1100 } else if (flag == MAT_GLOBAL_SUM) { 1101 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1102 info->nz_used = irecv[0]; 1103 info->nz_allocated = irecv[1]; 1104 info->nz_unneeded = irecv[2]; 1105 info->memory = irecv[3]; 1106 info->mallocs = irecv[4]; 1107 } else { 1108 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1109 } 1110 info->rows_global = (PetscReal)A->M; 1111 info->columns_global = (PetscReal)A->N; 1112 info->rows_local = (PetscReal)A->m; 1113 info->columns_local = (PetscReal)A->N; 1114 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1115 info->fill_ratio_needed = 0; 1116 info->factor_mallocs = 0; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1122 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1123 { 1124 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1125 PetscErrorCode ierr; 1126 1127 PetscFunctionBegin; 1128 switch (op) { 1129 case MAT_NO_NEW_NONZERO_LOCATIONS: 1130 case MAT_YES_NEW_NONZERO_LOCATIONS: 1131 case MAT_COLUMNS_UNSORTED: 1132 case MAT_COLUMNS_SORTED: 1133 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1134 case MAT_KEEP_ZEROED_ROWS: 1135 case MAT_NEW_NONZERO_LOCATION_ERR: 1136 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1137 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1138 break; 1139 case MAT_ROW_ORIENTED: 1140 a->roworiented = PETSC_TRUE; 1141 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1142 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1143 break; 1144 case MAT_ROWS_SORTED: 1145 case MAT_ROWS_UNSORTED: 1146 case MAT_YES_NEW_DIAGONALS: 1147 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1148 break; 1149 case MAT_COLUMN_ORIENTED: 1150 a->roworiented = PETSC_FALSE; 1151 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1152 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1153 break; 1154 case MAT_IGNORE_OFF_PROC_ENTRIES: 1155 a->donotstash = PETSC_TRUE; 1156 break; 1157 case MAT_NO_NEW_DIAGONALS: 1158 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1159 case MAT_USE_HASH_TABLE: 1160 a->ht_flag = PETSC_TRUE; 1161 break; 1162 case MAT_NOT_SYMMETRIC: 1163 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1164 case MAT_HERMITIAN: 1165 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1166 case MAT_SYMMETRIC: 1167 case MAT_STRUCTURALLY_SYMMETRIC: 1168 case MAT_NOT_HERMITIAN: 1169 case MAT_SYMMETRY_ETERNAL: 1170 case MAT_NOT_SYMMETRY_ETERNAL: 1171 break; 1172 default: 1173 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1174 } 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1180 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1181 { 1182 PetscErrorCode ierr; 1183 PetscFunctionBegin; 1184 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1190 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1191 { 1192 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1193 Mat a=baij->A, b=baij->B; 1194 PetscErrorCode ierr; 1195 PetscInt nv,m,n; 1196 PetscTruth flg; 1197 1198 PetscFunctionBegin; 1199 if (ll != rr){ 1200 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1201 if (!flg) 1202 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1203 } 1204 if (!ll) PetscFunctionReturn(0); 1205 1206 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1207 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1208 1209 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1210 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1211 1212 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1213 1214 /* left diagonalscale the off-diagonal part */ 1215 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1216 1217 /* scale the diagonal part */ 1218 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1219 1220 /* right diagonalscale the off-diagonal part */ 1221 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1222 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1228 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1229 { 1230 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1231 MPI_Comm comm = A->comm; 1232 static PetscTruth called = PETSC_FALSE; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 if (!a->rank) { 1237 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1238 } 1239 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1240 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1241 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1247 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1248 { 1249 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatEqual_MPISBAIJ" 1261 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1262 { 1263 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1264 Mat a,b,c,d; 1265 PetscTruth flg; 1266 PetscErrorCode ierr; 1267 1268 PetscFunctionBegin; 1269 a = matA->A; b = matA->B; 1270 c = matB->A; d = matB->B; 1271 1272 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1273 if (flg) { 1274 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1275 } 1276 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1282 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1283 { 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1293 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1294 { 1295 PetscErrorCode ierr; 1296 PetscInt i; 1297 PetscTruth flg; 1298 1299 PetscFunctionBegin; 1300 for (i=0; i<n; i++) { 1301 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1302 if (!flg) { 1303 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1304 } 1305 } 1306 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 1311 /* -------------------------------------------------------------------*/ 1312 static struct _MatOps MatOps_Values = { 1313 MatSetValues_MPISBAIJ, 1314 MatGetRow_MPISBAIJ, 1315 MatRestoreRow_MPISBAIJ, 1316 MatMult_MPISBAIJ, 1317 /* 4*/ MatMultAdd_MPISBAIJ, 1318 MatMultTranspose_MPISBAIJ, 1319 MatMultTransposeAdd_MPISBAIJ, 1320 0, 1321 0, 1322 0, 1323 /*10*/ 0, 1324 0, 1325 0, 1326 MatRelax_MPISBAIJ, 1327 MatTranspose_MPISBAIJ, 1328 /*15*/ MatGetInfo_MPISBAIJ, 1329 MatEqual_MPISBAIJ, 1330 MatGetDiagonal_MPISBAIJ, 1331 MatDiagonalScale_MPISBAIJ, 1332 MatNorm_MPISBAIJ, 1333 /*20*/ MatAssemblyBegin_MPISBAIJ, 1334 MatAssemblyEnd_MPISBAIJ, 1335 0, 1336 MatSetOption_MPISBAIJ, 1337 MatZeroEntries_MPISBAIJ, 1338 /*25*/ 0, 1339 0, 1340 0, 1341 0, 1342 0, 1343 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1344 0, 1345 0, 1346 0, 1347 0, 1348 /*35*/ MatDuplicate_MPISBAIJ, 1349 0, 1350 0, 1351 0, 1352 0, 1353 /*40*/ 0, 1354 MatGetSubMatrices_MPISBAIJ, 1355 MatIncreaseOverlap_MPISBAIJ, 1356 MatGetValues_MPISBAIJ, 1357 0, 1358 /*45*/ MatPrintHelp_MPISBAIJ, 1359 MatScale_MPISBAIJ, 1360 0, 1361 0, 1362 0, 1363 /*50*/ 0, 1364 0, 1365 0, 1366 0, 1367 0, 1368 /*55*/ 0, 1369 0, 1370 MatSetUnfactored_MPISBAIJ, 1371 0, 1372 MatSetValuesBlocked_MPISBAIJ, 1373 /*60*/ 0, 1374 0, 1375 0, 1376 MatGetPetscMaps_Petsc, 1377 0, 1378 /*65*/ 0, 1379 0, 1380 0, 1381 0, 1382 0, 1383 /*70*/ MatGetRowMax_MPISBAIJ, 1384 0, 1385 0, 1386 0, 1387 0, 1388 /*75*/ 0, 1389 0, 1390 0, 1391 0, 1392 0, 1393 /*80*/ 0, 1394 0, 1395 0, 1396 0, 1397 MatLoad_MPISBAIJ, 1398 /*85*/ 0, 1399 0, 1400 0, 1401 0, 1402 0, 1403 /*90*/ 0, 1404 0, 1405 0, 1406 0, 1407 0, 1408 /*95*/ 0, 1409 0, 1410 0, 1411 0}; 1412 1413 1414 EXTERN_C_BEGIN 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1417 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1418 { 1419 PetscFunctionBegin; 1420 *a = ((Mat_MPISBAIJ *)A->data)->A; 1421 *iscopy = PETSC_FALSE; 1422 PetscFunctionReturn(0); 1423 } 1424 EXTERN_C_END 1425 1426 EXTERN_C_BEGIN 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1429 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1430 { 1431 Mat_MPISBAIJ *b; 1432 PetscErrorCode ierr; 1433 PetscInt i,mbs,Mbs; 1434 1435 PetscFunctionBegin; 1436 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1437 1438 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1439 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1440 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1441 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1442 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1443 if (d_nnz) { 1444 for (i=0; i<B->m/bs; i++) { 1445 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1446 } 1447 } 1448 if (o_nnz) { 1449 for (i=0; i<B->m/bs; i++) { 1450 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1451 } 1452 } 1453 B->preallocated = PETSC_TRUE; 1454 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1455 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1456 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1457 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1458 1459 b = (Mat_MPISBAIJ*)B->data; 1460 mbs = B->m/bs; 1461 Mbs = B->M/bs; 1462 if (mbs*bs != B->m) { 1463 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs); 1464 } 1465 1466 B->bs = bs; 1467 b->bs2 = bs*bs; 1468 b->mbs = mbs; 1469 b->nbs = mbs; 1470 b->Mbs = Mbs; 1471 b->Nbs = Mbs; 1472 1473 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1474 b->rowners[0] = 0; 1475 for (i=2; i<=b->size; i++) { 1476 b->rowners[i] += b->rowners[i-1]; 1477 } 1478 b->rstart = b->rowners[b->rank]; 1479 b->rend = b->rowners[b->rank+1]; 1480 b->cstart = b->rstart; 1481 b->cend = b->rend; 1482 for (i=0; i<=b->size; i++) { 1483 b->rowners_bs[i] = b->rowners[i]*bs; 1484 } 1485 b->rstart_bs = b-> rstart*bs; 1486 b->rend_bs = b->rend*bs; 1487 1488 b->cstart_bs = b->cstart*bs; 1489 b->cend_bs = b->cend*bs; 1490 1491 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr); 1492 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1493 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1494 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1495 1496 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr); 1497 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1498 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1499 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1500 1501 /* build cache for off array entries formed */ 1502 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1503 1504 PetscFunctionReturn(0); 1505 } 1506 EXTERN_C_END 1507 1508 /*MC 1509 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1510 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1511 1512 Options Database Keys: 1513 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1514 1515 Level: beginner 1516 1517 .seealso: MatCreateMPISBAIJ 1518 M*/ 1519 1520 EXTERN_C_BEGIN 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatCreate_MPISBAIJ" 1523 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1524 { 1525 Mat_MPISBAIJ *b; 1526 PetscErrorCode ierr; 1527 PetscTruth flg; 1528 1529 PetscFunctionBegin; 1530 1531 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1532 B->data = (void*)b; 1533 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1534 1535 B->ops->destroy = MatDestroy_MPISBAIJ; 1536 B->ops->view = MatView_MPISBAIJ; 1537 B->mapping = 0; 1538 B->factor = 0; 1539 B->assembled = PETSC_FALSE; 1540 1541 B->insertmode = NOT_SET_VALUES; 1542 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1543 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1544 1545 /* build local table of row and column ownerships */ 1546 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 1547 b->cowners = b->rowners + b->size + 2; 1548 b->rowners_bs = b->cowners + b->size + 2; 1549 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1550 1551 /* build cache for off array entries formed */ 1552 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1553 b->donotstash = PETSC_FALSE; 1554 b->colmap = PETSC_NULL; 1555 b->garray = PETSC_NULL; 1556 b->roworiented = PETSC_TRUE; 1557 1558 #if defined(PETSC_USE_MAT_SINGLE) 1559 /* stuff for MatSetValues_XXX in single precision */ 1560 b->setvalueslen = 0; 1561 b->setvaluescopy = PETSC_NULL; 1562 #endif 1563 1564 /* stuff used in block assembly */ 1565 b->barray = 0; 1566 1567 /* stuff used for matrix vector multiply */ 1568 b->lvec = 0; 1569 b->Mvctx = 0; 1570 b->slvec0 = 0; 1571 b->slvec0b = 0; 1572 b->slvec1 = 0; 1573 b->slvec1a = 0; 1574 b->slvec1b = 0; 1575 b->sMvctx = 0; 1576 1577 /* stuff for MatGetRow() */ 1578 b->rowindices = 0; 1579 b->rowvalues = 0; 1580 b->getrowactive = PETSC_FALSE; 1581 1582 /* hash table stuff */ 1583 b->ht = 0; 1584 b->hd = 0; 1585 b->ht_size = 0; 1586 b->ht_flag = PETSC_FALSE; 1587 b->ht_fact = 0; 1588 b->ht_total_ct = 0; 1589 b->ht_insert_ct = 0; 1590 1591 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1592 if (flg) { 1593 PetscReal fact = 1.39; 1594 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1595 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1596 if (fact <= 1.0) fact = 1.39; 1597 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1598 ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 1599 } 1600 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1601 "MatStoreValues_MPISBAIJ", 1602 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1604 "MatRetrieveValues_MPISBAIJ", 1605 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1607 "MatGetDiagonalBlock_MPISBAIJ", 1608 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1610 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1611 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1612 B->symmetric = PETSC_TRUE; 1613 B->structurally_symmetric = PETSC_TRUE; 1614 B->symmetric_set = PETSC_TRUE; 1615 B->structurally_symmetric_set = PETSC_TRUE; 1616 PetscFunctionReturn(0); 1617 } 1618 EXTERN_C_END 1619 1620 /*MC 1621 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1622 1623 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1624 and MATMPISBAIJ otherwise. 1625 1626 Options Database Keys: 1627 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1628 1629 Level: beginner 1630 1631 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1632 M*/ 1633 1634 EXTERN_C_BEGIN 1635 #undef __FUNCT__ 1636 #define __FUNCT__ "MatCreate_SBAIJ" 1637 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1638 { 1639 PetscErrorCode ierr; 1640 PetscMPIInt size; 1641 1642 PetscFunctionBegin; 1643 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1644 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1645 if (size == 1) { 1646 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1647 } else { 1648 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1649 } 1650 PetscFunctionReturn(0); 1651 } 1652 EXTERN_C_END 1653 1654 #undef __FUNCT__ 1655 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1656 /*@C 1657 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1658 the user should preallocate the matrix storage by setting the parameters 1659 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1660 performance can be increased by more than a factor of 50. 1661 1662 Collective on Mat 1663 1664 Input Parameters: 1665 + A - the matrix 1666 . bs - size of blockk 1667 . d_nz - number of block nonzeros per block row in diagonal portion of local 1668 submatrix (same for all local rows) 1669 . d_nnz - array containing the number of block nonzeros in the various block rows 1670 in the upper triangular and diagonal part of the in diagonal portion of the local 1671 (possibly different for each block row) or PETSC_NULL. You must leave room 1672 for the diagonal entry even if it is zero. 1673 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1674 submatrix (same for all local rows). 1675 - o_nnz - array containing the number of nonzeros in the various block rows of the 1676 off-diagonal portion of the local submatrix (possibly different for 1677 each block row) or PETSC_NULL. 1678 1679 1680 Options Database Keys: 1681 . -mat_no_unroll - uses code that does not unroll the loops in the 1682 block calculations (much slower) 1683 . -mat_block_size - size of the blocks to use 1684 1685 Notes: 1686 1687 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1688 than it must be used on all processors that share the object for that argument. 1689 1690 If the *_nnz parameter is given then the *_nz parameter is ignored 1691 1692 Storage Information: 1693 For a square global matrix we define each processor's diagonal portion 1694 to be its local rows and the corresponding columns (a square submatrix); 1695 each processor's off-diagonal portion encompasses the remainder of the 1696 local matrix (a rectangular submatrix). 1697 1698 The user can specify preallocated storage for the diagonal part of 1699 the local submatrix with either d_nz or d_nnz (not both). Set 1700 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1701 memory allocation. Likewise, specify preallocated storage for the 1702 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1703 1704 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1705 the figure below we depict these three local rows and all columns (0-11). 1706 1707 .vb 1708 0 1 2 3 4 5 6 7 8 9 10 11 1709 ------------------- 1710 row 3 | o o o d d d o o o o o o 1711 row 4 | o o o d d d o o o o o o 1712 row 5 | o o o d d d o o o o o o 1713 ------------------- 1714 .ve 1715 1716 Thus, any entries in the d locations are stored in the d (diagonal) 1717 submatrix, and any entries in the o locations are stored in the 1718 o (off-diagonal) submatrix. Note that the d matrix is stored in 1719 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1720 1721 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1722 plus the diagonal part of the d matrix, 1723 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1724 In general, for PDE problems in which most nonzeros are near the diagonal, 1725 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1726 or you will get TERRIBLE performance; see the users' manual chapter on 1727 matrices. 1728 1729 Level: intermediate 1730 1731 .keywords: matrix, block, aij, compressed row, sparse, parallel 1732 1733 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1734 @*/ 1735 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1736 { 1737 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1738 1739 PetscFunctionBegin; 1740 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1741 if (f) { 1742 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1743 } 1744 PetscFunctionReturn(0); 1745 } 1746 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatCreateMPISBAIJ" 1749 /*@C 1750 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1751 (block compressed row). For good matrix assembly performance 1752 the user should preallocate the matrix storage by setting the parameters 1753 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1754 performance can be increased by more than a factor of 50. 1755 1756 Collective on MPI_Comm 1757 1758 Input Parameters: 1759 + comm - MPI communicator 1760 . bs - size of blockk 1761 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1762 This value should be the same as the local size used in creating the 1763 y vector for the matrix-vector product y = Ax. 1764 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1765 This value should be the same as the local size used in creating the 1766 x vector for the matrix-vector product y = Ax. 1767 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1768 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1769 . d_nz - number of block nonzeros per block row in diagonal portion of local 1770 submatrix (same for all local rows) 1771 . d_nnz - array containing the number of block nonzeros in the various block rows 1772 in the upper triangular portion of the in diagonal portion of the local 1773 (possibly different for each block block row) or PETSC_NULL. 1774 You must leave room for the diagonal entry even if it is zero. 1775 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1776 submatrix (same for all local rows). 1777 - o_nnz - array containing the number of nonzeros in the various block rows of the 1778 off-diagonal portion of the local submatrix (possibly different for 1779 each block row) or PETSC_NULL. 1780 1781 Output Parameter: 1782 . A - the matrix 1783 1784 Options Database Keys: 1785 . -mat_no_unroll - uses code that does not unroll the loops in the 1786 block calculations (much slower) 1787 . -mat_block_size - size of the blocks to use 1788 . -mat_mpi - use the parallel matrix data structures even on one processor 1789 (defaults to using SeqBAIJ format on one processor) 1790 1791 Notes: 1792 The number of rows and columns must be divisible by blocksize. 1793 1794 The user MUST specify either the local or global matrix dimensions 1795 (possibly both). 1796 1797 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1798 than it must be used on all processors that share the object for that argument. 1799 1800 If the *_nnz parameter is given then the *_nz parameter is ignored 1801 1802 Storage Information: 1803 For a square global matrix we define each processor's diagonal portion 1804 to be its local rows and the corresponding columns (a square submatrix); 1805 each processor's off-diagonal portion encompasses the remainder of the 1806 local matrix (a rectangular submatrix). 1807 1808 The user can specify preallocated storage for the diagonal part of 1809 the local submatrix with either d_nz or d_nnz (not both). Set 1810 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1811 memory allocation. Likewise, specify preallocated storage for the 1812 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1813 1814 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1815 the figure below we depict these three local rows and all columns (0-11). 1816 1817 .vb 1818 0 1 2 3 4 5 6 7 8 9 10 11 1819 ------------------- 1820 row 3 | o o o d d d o o o o o o 1821 row 4 | o o o d d d o o o o o o 1822 row 5 | o o o d d d o o o o o o 1823 ------------------- 1824 .ve 1825 1826 Thus, any entries in the d locations are stored in the d (diagonal) 1827 submatrix, and any entries in the o locations are stored in the 1828 o (off-diagonal) submatrix. Note that the d matrix is stored in 1829 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1830 1831 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1832 plus the diagonal part of the d matrix, 1833 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1834 In general, for PDE problems in which most nonzeros are near the diagonal, 1835 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1836 or you will get TERRIBLE performance; see the users' manual chapter on 1837 matrices. 1838 1839 Level: intermediate 1840 1841 .keywords: matrix, block, aij, compressed row, sparse, parallel 1842 1843 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1844 @*/ 1845 1846 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 1847 { 1848 PetscErrorCode ierr; 1849 PetscMPIInt size; 1850 1851 PetscFunctionBegin; 1852 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1853 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1854 if (size > 1) { 1855 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1856 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1857 } else { 1858 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1859 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1860 } 1861 PetscFunctionReturn(0); 1862 } 1863 1864 1865 #undef __FUNCT__ 1866 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1867 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1868 { 1869 Mat mat; 1870 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1871 PetscErrorCode ierr; 1872 PetscInt len=0,nt,bs=matin->bs,mbs=oldmat->mbs; 1873 PetscScalar *array; 1874 1875 PetscFunctionBegin; 1876 *newmat = 0; 1877 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1878 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1879 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1880 1881 mat->factor = matin->factor; 1882 mat->preallocated = PETSC_TRUE; 1883 mat->assembled = PETSC_TRUE; 1884 mat->insertmode = NOT_SET_VALUES; 1885 1886 a = (Mat_MPISBAIJ*)mat->data; 1887 mat->bs = matin->bs; 1888 a->bs2 = oldmat->bs2; 1889 a->mbs = oldmat->mbs; 1890 a->nbs = oldmat->nbs; 1891 a->Mbs = oldmat->Mbs; 1892 a->Nbs = oldmat->Nbs; 1893 1894 a->rstart = oldmat->rstart; 1895 a->rend = oldmat->rend; 1896 a->cstart = oldmat->cstart; 1897 a->cend = oldmat->cend; 1898 a->size = oldmat->size; 1899 a->rank = oldmat->rank; 1900 a->donotstash = oldmat->donotstash; 1901 a->roworiented = oldmat->roworiented; 1902 a->rowindices = 0; 1903 a->rowvalues = 0; 1904 a->getrowactive = PETSC_FALSE; 1905 a->barray = 0; 1906 a->rstart_bs = oldmat->rstart_bs; 1907 a->rend_bs = oldmat->rend_bs; 1908 a->cstart_bs = oldmat->cstart_bs; 1909 a->cend_bs = oldmat->cend_bs; 1910 1911 /* hash table stuff */ 1912 a->ht = 0; 1913 a->hd = 0; 1914 a->ht_size = 0; 1915 a->ht_flag = oldmat->ht_flag; 1916 a->ht_fact = oldmat->ht_fact; 1917 a->ht_total_ct = 0; 1918 a->ht_insert_ct = 0; 1919 1920 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1921 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1922 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 1923 if (oldmat->colmap) { 1924 #if defined (PETSC_USE_CTABLE) 1925 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1926 #else 1927 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1928 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1929 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1930 #endif 1931 } else a->colmap = 0; 1932 1933 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 1934 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1935 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1936 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 1937 } else a->garray = 0; 1938 1939 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1940 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1941 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1942 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1943 1944 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 1945 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 1946 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 1947 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 1948 1949 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 1950 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 1951 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 1952 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 1953 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 1954 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 1955 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 1956 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 1957 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 1958 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 1959 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 1960 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 1961 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 1962 1963 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 1964 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 1965 a->sMvctx = oldmat->sMvctx; 1966 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 1967 1968 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1969 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1970 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1971 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1972 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 1973 *newmat = mat; 1974 PetscFunctionReturn(0); 1975 } 1976 1977 #include "petscsys.h" 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatLoad_MPISBAIJ" 1981 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1982 { 1983 Mat A; 1984 PetscErrorCode ierr; 1985 PetscInt i,nz,j,rstart,rend; 1986 PetscScalar *vals,*buf; 1987 MPI_Comm comm = ((PetscObject)viewer)->comm; 1988 MPI_Status status; 1989 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners; 1990 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1991 PetscInt *locrowlens,*procsnz = 0,jj,*mycols,*ibuf; 1992 PetscInt bs=1,Mbs,mbs,extra_rows; 1993 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1994 PetscInt dcount,kmax,k,nzcount,tmp; 1995 int fd; 1996 1997 PetscFunctionBegin; 1998 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1999 2000 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2001 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2002 if (!rank) { 2003 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2004 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2005 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2006 if (header[3] < 0) { 2007 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2008 } 2009 } 2010 2011 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2012 M = header[1]; N = header[2]; 2013 2014 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2015 2016 /* 2017 This code adds extra rows to make sure the number of rows is 2018 divisible by the blocksize 2019 */ 2020 Mbs = M/bs; 2021 extra_rows = bs - M + bs*(Mbs); 2022 if (extra_rows == bs) extra_rows = 0; 2023 else Mbs++; 2024 if (extra_rows &&!rank) { 2025 ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2026 } 2027 2028 /* determine ownership of all rows */ 2029 mbs = Mbs/size + ((Mbs % size) > rank); 2030 m = mbs*bs; 2031 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2032 browners = rowners + size + 1; 2033 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2034 rowners[0] = 0; 2035 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2036 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2037 rstart = rowners[rank]; 2038 rend = rowners[rank+1]; 2039 2040 /* distribute row lengths to all processors */ 2041 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2042 if (!rank) { 2043 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2044 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2045 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2046 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2047 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2048 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2049 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2050 } else { 2051 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2052 } 2053 2054 if (!rank) { /* procs[0] */ 2055 /* calculate the number of nonzeros on each processor */ 2056 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2057 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2058 for (i=0; i<size; i++) { 2059 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2060 procsnz[i] += rowlengths[j]; 2061 } 2062 } 2063 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2064 2065 /* determine max buffer needed and allocate it */ 2066 maxnz = 0; 2067 for (i=0; i<size; i++) { 2068 maxnz = PetscMax(maxnz,procsnz[i]); 2069 } 2070 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2071 2072 /* read in my part of the matrix column indices */ 2073 nz = procsnz[0]; 2074 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2075 mycols = ibuf; 2076 if (size == 1) nz -= extra_rows; 2077 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2078 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2079 2080 /* read in every ones (except the last) and ship off */ 2081 for (i=1; i<size-1; i++) { 2082 nz = procsnz[i]; 2083 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2084 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2085 } 2086 /* read in the stuff for the last proc */ 2087 if (size != 1) { 2088 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2089 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2090 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2091 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2092 } 2093 ierr = PetscFree(cols);CHKERRQ(ierr); 2094 } else { /* procs[i], i>0 */ 2095 /* determine buffer space needed for message */ 2096 nz = 0; 2097 for (i=0; i<m; i++) { 2098 nz += locrowlens[i]; 2099 } 2100 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2101 mycols = ibuf; 2102 /* receive message of column indices*/ 2103 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2104 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2105 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2106 } 2107 2108 /* loop over local rows, determining number of off diagonal entries */ 2109 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2110 odlens = dlens + (rend-rstart); 2111 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2112 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2113 masked1 = mask + Mbs; 2114 masked2 = masked1 + Mbs; 2115 rowcount = 0; nzcount = 0; 2116 for (i=0; i<mbs; i++) { 2117 dcount = 0; 2118 odcount = 0; 2119 for (j=0; j<bs; j++) { 2120 kmax = locrowlens[rowcount]; 2121 for (k=0; k<kmax; k++) { 2122 tmp = mycols[nzcount++]/bs; /* block col. index */ 2123 if (!mask[tmp]) { 2124 mask[tmp] = 1; 2125 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2126 else masked1[dcount++] = tmp; /* entry in diag portion */ 2127 } 2128 } 2129 rowcount++; 2130 } 2131 2132 dlens[i] = dcount; /* d_nzz[i] */ 2133 odlens[i] = odcount; /* o_nzz[i] */ 2134 2135 /* zero out the mask elements we set */ 2136 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2137 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2138 } 2139 2140 /* create our matrix */ 2141 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2142 ierr = MatSetType(A,type);CHKERRQ(ierr); 2143 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2144 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2145 2146 if (!rank) { 2147 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2148 /* read in my part of the matrix numerical values */ 2149 nz = procsnz[0]; 2150 vals = buf; 2151 mycols = ibuf; 2152 if (size == 1) nz -= extra_rows; 2153 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2154 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2155 2156 /* insert into matrix */ 2157 jj = rstart*bs; 2158 for (i=0; i<m; i++) { 2159 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2160 mycols += locrowlens[i]; 2161 vals += locrowlens[i]; 2162 jj++; 2163 } 2164 2165 /* read in other processors (except the last one) and ship out */ 2166 for (i=1; i<size-1; i++) { 2167 nz = procsnz[i]; 2168 vals = buf; 2169 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2170 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2171 } 2172 /* the last proc */ 2173 if (size != 1){ 2174 nz = procsnz[i] - extra_rows; 2175 vals = buf; 2176 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2177 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2178 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2179 } 2180 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2181 2182 } else { 2183 /* receive numeric values */ 2184 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2185 2186 /* receive message of values*/ 2187 vals = buf; 2188 mycols = ibuf; 2189 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2190 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2191 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2192 2193 /* insert into matrix */ 2194 jj = rstart*bs; 2195 for (i=0; i<m; i++) { 2196 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2197 mycols += locrowlens[i]; 2198 vals += locrowlens[i]; 2199 jj++; 2200 } 2201 } 2202 2203 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2204 ierr = PetscFree(buf);CHKERRQ(ierr); 2205 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2206 ierr = PetscFree(rowners);CHKERRQ(ierr); 2207 ierr = PetscFree(dlens);CHKERRQ(ierr); 2208 ierr = PetscFree(mask);CHKERRQ(ierr); 2209 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2210 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2211 *newmat = A; 2212 PetscFunctionReturn(0); 2213 } 2214 2215 #undef __FUNCT__ 2216 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2217 /*XXXXX@ 2218 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2219 2220 Input Parameters: 2221 . mat - the matrix 2222 . fact - factor 2223 2224 Collective on Mat 2225 2226 Level: advanced 2227 2228 Notes: 2229 This can also be set by the command line option: -mat_use_hash_table fact 2230 2231 .keywords: matrix, hashtable, factor, HT 2232 2233 .seealso: MatSetOption() 2234 @XXXXX*/ 2235 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2239 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2240 { 2241 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2242 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2243 PetscReal atmp; 2244 PetscReal *work,*svalues,*rvalues; 2245 PetscErrorCode ierr; 2246 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2247 PetscMPIInt rank,size; 2248 PetscInt *rowners_bs,dest,count,source; 2249 PetscScalar *va; 2250 MatScalar *ba; 2251 MPI_Status stat; 2252 2253 PetscFunctionBegin; 2254 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2255 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2256 2257 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2258 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2259 2260 bs = A->bs; 2261 mbs = a->mbs; 2262 Mbs = a->Mbs; 2263 ba = b->a; 2264 bi = b->i; 2265 bj = b->j; 2266 2267 /* find ownerships */ 2268 rowners_bs = a->rowners_bs; 2269 2270 /* each proc creates an array to be distributed */ 2271 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2272 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2273 2274 /* row_max for B */ 2275 if (rank != size-1){ 2276 for (i=0; i<mbs; i++) { 2277 ncols = bi[1] - bi[0]; bi++; 2278 brow = bs*i; 2279 for (j=0; j<ncols; j++){ 2280 bcol = bs*(*bj); 2281 for (kcol=0; kcol<bs; kcol++){ 2282 col = bcol + kcol; /* local col index */ 2283 col += rowners_bs[rank+1]; /* global col index */ 2284 for (krow=0; krow<bs; krow++){ 2285 atmp = PetscAbsScalar(*ba); ba++; 2286 row = brow + krow; /* local row index */ 2287 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2288 if (work[col] < atmp) work[col] = atmp; 2289 } 2290 } 2291 bj++; 2292 } 2293 } 2294 2295 /* send values to its owners */ 2296 for (dest=rank+1; dest<size; dest++){ 2297 svalues = work + rowners_bs[dest]; 2298 count = rowners_bs[dest+1]-rowners_bs[dest]; 2299 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2300 } 2301 } 2302 2303 /* receive values */ 2304 if (rank){ 2305 rvalues = work; 2306 count = rowners_bs[rank+1]-rowners_bs[rank]; 2307 for (source=0; source<rank; source++){ 2308 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2309 /* process values */ 2310 for (i=0; i<count; i++){ 2311 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2312 } 2313 } 2314 } 2315 2316 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2317 ierr = PetscFree(work);CHKERRQ(ierr); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatRelax_MPISBAIJ" 2323 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2324 { 2325 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2326 PetscErrorCode ierr; 2327 PetscInt mbs=mat->mbs,bs=matin->bs; 2328 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2329 Vec bb1; 2330 2331 PetscFunctionBegin; 2332 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2333 if (bs > 1) 2334 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2335 2336 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2337 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2338 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2339 its--; 2340 } 2341 2342 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2343 while (its--){ 2344 2345 /* lower triangular part: slvec0b = - B^T*xx */ 2346 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2347 2348 /* copy xx into slvec0a */ 2349 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2351 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2352 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2353 2354 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2355 2356 /* copy bb into slvec1a */ 2357 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2358 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2359 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2360 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2361 2362 /* set slvec1b = 0 */ 2363 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2364 2365 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2366 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2367 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2368 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2369 2370 /* upper triangular part: bb1 = bb1 - B*x */ 2371 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2372 2373 /* local diagonal sweep */ 2374 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2375 } 2376 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2377 } else { 2378 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2379 } 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2385 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2386 { 2387 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2388 PetscErrorCode ierr; 2389 PetscScalar mone=-1.0; 2390 Vec lvec1,bb1; 2391 2392 PetscFunctionBegin; 2393 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2394 if (matin->bs > 1) 2395 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2396 2397 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2398 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2399 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2400 its--; 2401 } 2402 2403 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2404 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2405 while (its--){ 2406 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2407 2408 /* lower diagonal part: bb1 = bb - B^T*xx */ 2409 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2410 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2411 2412 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2413 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2414 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2415 2416 /* upper diagonal part: bb1 = bb1 - B*x */ 2417 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2418 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2419 2420 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2421 2422 /* diagonal sweep */ 2423 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2424 } 2425 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2426 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2427 } else { 2428 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2429 } 2430 PetscFunctionReturn(0); 2431 } 2432 2433