1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 104 N = nrow++ - 1; \ 105 /* shift up all the later entries in this row */ \ 106 for (ii=N; ii>=_i; ii--) { \ 107 rp[ii+1] = rp[ii]; \ 108 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 109 } \ 110 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 111 rp[_i] = bcol; \ 112 ap[bs2*_i + bs*cidx + ridx] = value; \ 113 a_noinsert:; \ 114 ailen[brow] = nrow; \ 115 } 116 #ifndef MatSetValues_SeqBAIJ_B_Private 117 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 118 { \ 119 brow = row/bs; \ 120 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 121 rmax = bimax[brow]; nrow = bilen[brow]; \ 122 bcol = col/bs; \ 123 ridx = row % bs; cidx = col % bs; \ 124 low = 0; high = nrow; \ 125 while (high-low > 3) { \ 126 t = (low+high)/2; \ 127 if (rp[t] > bcol) high = t; \ 128 else low = t; \ 129 } \ 130 for (_i=low; _i<high; _i++) { \ 131 if (rp[_i] > bcol) break; \ 132 if (rp[_i] == bcol) { \ 133 bap = ap + bs2*_i + bs*cidx + ridx; \ 134 if (addv == ADD_VALUES) *bap += value; \ 135 else *bap = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (b->nonew == 1) goto b_noinsert; \ 140 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 141 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 b_noinsert:; \ 152 bilen[brow] = nrow; \ 153 } 154 #endif 155 156 #if defined(PETSC_USE_MAT_SINGLE) 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetValues_MPISBAIJ" 159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 160 { 161 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,N = m*n; 164 MatScalar *vsingle; 165 166 PetscFunctionBegin; 167 if (N > b->setvalueslen) { 168 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 169 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 170 b->setvalueslen = N; 171 } 172 vsingle = b->setvaluescopy; 173 174 for (i=0; i<N; i++) { 175 vsingle[i] = v[i]; 176 } 177 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 184 { 185 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 186 PetscErrorCode ierr; 187 PetscInt i,N = m*n*b->bs2; 188 MatScalar *vsingle; 189 190 PetscFunctionBegin; 191 if (N > b->setvalueslen) { 192 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 193 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 194 b->setvalueslen = N; 195 } 196 vsingle = b->setvaluescopy; 197 for (i=0; i<N; i++) { 198 vsingle[i] = v[i]; 199 } 200 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 #endif 204 205 /* Only add/insert a(i,j) with i<=j (blocks). 206 Any a(i,j) with i>j input by user is ingored. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 211 { 212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 213 MatScalar value; 214 PetscTruth roworiented = baij->roworiented; 215 PetscErrorCode ierr; 216 PetscInt i,j,row,col; 217 PetscInt rstart_orig=baij->rstart_bs; 218 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 219 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 220 221 /* Some Variables required in the macro */ 222 Mat A = baij->A; 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 224 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 225 MatScalar *aa=a->a; 226 227 Mat B = baij->B; 228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 229 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 230 MatScalar *ba=b->a; 231 232 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 233 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 234 MatScalar *ap,*bap; 235 236 /* for stash */ 237 PetscInt n_loc, *in_loc=0; 238 MatScalar *v_loc=0; 239 240 PetscFunctionBegin; 241 242 if(!baij->donotstash){ 243 ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr); 244 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 245 } 246 247 for (i=0; i<m; i++) { 248 if (im[i] < 0) continue; 249 #if defined(PETSC_USE_DEBUG) 250 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 251 #endif 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 253 row = im[i] - rstart_orig; /* local row index */ 254 for (j=0; j<n; j++) { 255 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 256 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 257 col = in[j] - cstart_orig; /* local col index */ 258 brow = row/bs; bcol = col/bs; 259 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 260 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 261 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } else if (in[j] < 0) continue; 264 #if defined(PETSC_USE_DEBUG) 265 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 266 #endif 267 else { /* off-diag entry (B) */ 268 if (mat->was_assembled) { 269 if (!baij->colmap) { 270 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 271 } 272 #if defined (PETSC_USE_CTABLE) 273 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 274 col = col - 1; 275 #else 276 col = baij->colmap[in[j]/bs] - 1; 277 #endif 278 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 279 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 280 col = in[j]; 281 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 282 B = baij->B; 283 b = (Mat_SeqBAIJ*)(B)->data; 284 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 285 ba=b->a; 286 } else col += in[j]%bs; 287 } else col = in[j]; 288 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 289 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 290 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 291 } 292 } 293 } else { /* off processor entry */ 294 if (!baij->donotstash) { 295 n_loc = 0; 296 for (j=0; j<n; j++){ 297 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 298 in_loc[n_loc] = in[j]; 299 if (roworiented) { 300 v_loc[n_loc] = v[i*n+j]; 301 } else { 302 v_loc[n_loc] = v[j*m+i]; 303 } 304 n_loc++; 305 } 306 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 307 } 308 } 309 } 310 311 if(!baij->donotstash){ 312 ierr = PetscFree(in_loc);CHKERRQ(ierr); 313 ierr = PetscFree(v_loc);CHKERRQ(ierr); 314 } 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 321 { 322 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 323 const MatScalar *value; 324 MatScalar *barray=baij->barray; 325 PetscTruth roworiented = baij->roworiented; 326 PetscErrorCode ierr; 327 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 328 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 329 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 330 331 PetscFunctionBegin; 332 if(!barray) { 333 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 334 baij->barray = barray; 335 } 336 337 if (roworiented) { 338 stepval = (n-1)*bs; 339 } else { 340 stepval = (m-1)*bs; 341 } 342 for (i=0; i<m; i++) { 343 if (im[i] < 0) continue; 344 #if defined(PETSC_USE_DEBUG) 345 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 346 #endif 347 if (im[i] >= rstart && im[i] < rend) { 348 row = im[i] - rstart; 349 for (j=0; j<n; j++) { 350 /* If NumCol = 1 then a copy is not required */ 351 if ((roworiented) && (n == 1)) { 352 barray = (MatScalar*) v + i*bs2; 353 } else if((!roworiented) && (m == 1)) { 354 barray = (MatScalar*) v + j*bs2; 355 } else { /* Here a copy is required */ 356 if (roworiented) { 357 value = v + i*(stepval+bs)*bs + j*bs; 358 } else { 359 value = v + j*(stepval+bs)*bs + i*bs; 360 } 361 for (ii=0; ii<bs; ii++,value+=stepval) { 362 for (jj=0; jj<bs; jj++) { 363 *barray++ = *value++; 364 } 365 } 366 barray -=bs2; 367 } 368 369 if (in[j] >= cstart && in[j] < cend){ 370 col = in[j] - cstart; 371 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 372 } 373 else if (in[j] < 0) continue; 374 #if defined(PETSC_USE_DEBUG) 375 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 376 #endif 377 else { 378 if (mat->was_assembled) { 379 if (!baij->colmap) { 380 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 381 } 382 383 #if defined(PETSC_USE_DEBUG) 384 #if defined (PETSC_USE_CTABLE) 385 { PetscInt data; 386 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 387 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 388 } 389 #else 390 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 391 #endif 392 #endif 393 #if defined (PETSC_USE_CTABLE) 394 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 395 col = (col - 1)/bs; 396 #else 397 col = (baij->colmap[in[j]] - 1)/bs; 398 #endif 399 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 400 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 401 col = in[j]; 402 } 403 } 404 else col = in[j]; 405 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 406 } 407 } 408 } else { 409 if (!baij->donotstash) { 410 if (roworiented) { 411 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 412 } else { 413 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 414 } 415 } 416 } 417 } 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatGetValues_MPISBAIJ" 423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 424 { 425 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 426 PetscErrorCode ierr; 427 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 428 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 429 430 PetscFunctionBegin; 431 for (i=0; i<m; i++) { 432 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 433 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 434 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 435 row = idxm[i] - bsrstart; 436 for (j=0; j<n; j++) { 437 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 438 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 439 if (idxn[j] >= bscstart && idxn[j] < bscend){ 440 col = idxn[j] - bscstart; 441 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } else { 443 if (!baij->colmap) { 444 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 445 } 446 #if defined (PETSC_USE_CTABLE) 447 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 448 data --; 449 #else 450 data = baij->colmap[idxn[j]/bs]-1; 451 #endif 452 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 453 else { 454 col = data + idxn[j]%bs; 455 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 456 } 457 } 458 } 459 } else { 460 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 461 } 462 } 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatNorm_MPISBAIJ" 468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 469 { 470 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 471 PetscErrorCode ierr; 472 PetscReal sum[2],*lnorm2; 473 474 PetscFunctionBegin; 475 if (baij->size == 1) { 476 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 477 } else { 478 if (type == NORM_FROBENIUS) { 479 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 480 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 481 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 482 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 483 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 484 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 485 *norm = sqrt(sum[0] + 2*sum[1]); 486 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 487 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 488 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 489 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 490 PetscReal *rsum,*rsum2,vabs; 491 PetscInt *jj,*garray=baij->garray,rstart=baij->rstart,nz; 492 PetscInt brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs; 493 MatScalar *v; 494 495 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 496 rsum2 = rsum + mat->N; 497 ierr = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 498 /* Amat */ 499 v = amat->a; jj = amat->j; 500 for (brow=0; brow<mbs; brow++) { 501 grow = bs*(rstart + brow); 502 nz = amat->i[brow+1] - amat->i[brow]; 503 for (bcol=0; bcol<nz; bcol++){ 504 gcol = bs*(rstart + *jj); jj++; 505 for (col=0; col<bs; col++){ 506 for (row=0; row<bs; row++){ 507 vabs = PetscAbsScalar(*v); v++; 508 rsum[gcol+col] += vabs; 509 /* non-diagonal block */ 510 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 511 } 512 } 513 } 514 } 515 /* Bmat */ 516 v = bmat->a; jj = bmat->j; 517 for (brow=0; brow<mbs; brow++) { 518 grow = bs*(rstart + brow); 519 nz = bmat->i[brow+1] - bmat->i[brow]; 520 for (bcol=0; bcol<nz; bcol++){ 521 gcol = bs*garray[*jj]; jj++; 522 for (col=0; col<bs; col++){ 523 for (row=0; row<bs; row++){ 524 vabs = PetscAbsScalar(*v); v++; 525 rsum[gcol+col] += vabs; 526 rsum[grow+row] += vabs; 527 } 528 } 529 } 530 } 531 ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 532 *norm = 0.0; 533 for (col=0; col<mat->N; col++) { 534 if (rsum2[col] > *norm) *norm = rsum2[col]; 535 } 536 ierr = PetscFree(rsum);CHKERRQ(ierr); 537 } else { 538 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 539 } 540 } 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 547 { 548 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 549 PetscErrorCode ierr; 550 PetscInt nstash,reallocs; 551 InsertMode addv; 552 553 PetscFunctionBegin; 554 if (baij->donotstash) { 555 PetscFunctionReturn(0); 556 } 557 558 /* make sure all processors are either in INSERTMODE or ADDMODE */ 559 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 560 if (addv == (ADD_VALUES|INSERT_VALUES)) { 561 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 562 } 563 mat->insertmode = addv; /* in case this processor had no cache */ 564 565 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 566 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 567 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 568 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 569 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 570 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 577 { 578 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 579 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 580 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 581 PetscErrorCode ierr; 582 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 583 PetscInt *row,*col,other_disassembled; 584 PetscMPIInt n; 585 PetscTruth r1,r2,r3; 586 MatScalar *val; 587 InsertMode addv = mat->insertmode; 588 589 PetscFunctionBegin; 590 591 if (!baij->donotstash) { 592 while (1) { 593 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 594 if (!flg) break; 595 596 for (i=0; i<n;) { 597 /* Now identify the consecutive vals belonging to the same row */ 598 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 599 if (j < n) ncols = j-i; 600 else ncols = n-i; 601 /* Now assemble all these values with a single function call */ 602 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 603 i = j; 604 } 605 } 606 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 607 /* Now process the block-stash. Since the values are stashed column-oriented, 608 set the roworiented flag to column oriented, and after MatSetValues() 609 restore the original flags */ 610 r1 = baij->roworiented; 611 r2 = a->roworiented; 612 r3 = b->roworiented; 613 baij->roworiented = PETSC_FALSE; 614 a->roworiented = PETSC_FALSE; 615 b->roworiented = PETSC_FALSE; 616 while (1) { 617 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 618 if (!flg) break; 619 620 for (i=0; i<n;) { 621 /* Now identify the consecutive vals belonging to the same row */ 622 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 623 if (j < n) ncols = j-i; 624 else ncols = n-i; 625 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 626 i = j; 627 } 628 } 629 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 630 baij->roworiented = r1; 631 a->roworiented = r2; 632 b->roworiented = r3; 633 } 634 635 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 636 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 637 638 /* determine if any processor has disassembled, if so we must 639 also disassemble ourselfs, in order that we may reassemble. */ 640 /* 641 if nonzero structure of submatrix B cannot change then we know that 642 no processor disassembled thus we can skip this stuff 643 */ 644 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 645 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 646 if (mat->was_assembled && !other_disassembled) { 647 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 648 } 649 } 650 651 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 652 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 653 } 654 b->compressedrow.use = PETSC_TRUE; 655 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 656 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 657 658 if (baij->rowvalues) { 659 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 660 baij->rowvalues = 0; 661 } 662 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt bs = mat->bs; 673 PetscMPIInt size = baij->size,rank = baij->rank; 674 PetscTruth iascii,isdraw; 675 PetscViewer sviewer; 676 PetscViewerFormat format; 677 678 PetscFunctionBegin; 679 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 680 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 681 if (iascii) { 682 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 683 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 684 MatInfo info; 685 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 686 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 687 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 688 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 689 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 690 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 692 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 694 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 695 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } else if (format == PETSC_VIEWER_ASCII_INFO) { 698 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 701 PetscFunctionReturn(0); 702 } 703 } 704 705 if (isdraw) { 706 PetscDraw draw; 707 PetscTruth isnull; 708 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 709 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 710 } 711 712 if (size == 1) { 713 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 714 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 715 } else { 716 /* assemble the entire matrix onto first processor. */ 717 Mat A; 718 Mat_SeqSBAIJ *Aloc; 719 Mat_SeqBAIJ *Bloc; 720 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 721 MatScalar *a; 722 723 /* Should this be the same type as mat? */ 724 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 725 if (!rank) { 726 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 727 } else { 728 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 729 } 730 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 731 ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 733 734 /* copy over the A part */ 735 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 736 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 737 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 738 739 for (i=0; i<mbs; i++) { 740 rvals[0] = bs*(baij->rstart + i); 741 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 742 for (j=ai[i]; j<ai[i+1]; j++) { 743 col = (baij->cstart+aj[j])*bs; 744 for (k=0; k<bs; k++) { 745 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 746 col++; a += bs; 747 } 748 } 749 } 750 /* copy over the B part */ 751 Bloc = (Mat_SeqBAIJ*)baij->B->data; 752 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 753 for (i=0; i<mbs; i++) { 754 rvals[0] = bs*(baij->rstart + i); 755 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 756 for (j=ai[i]; j<ai[i+1]; j++) { 757 col = baij->garray[aj[j]]*bs; 758 for (k=0; k<bs; k++) { 759 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 760 col++; a += bs; 761 } 762 } 763 } 764 ierr = PetscFree(rvals);CHKERRQ(ierr); 765 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 /* 768 Everyone has to call to draw the matrix since the graphics waits are 769 synchronized across all processors that share the PetscDraw object 770 */ 771 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 772 if (!rank) { 773 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 774 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 775 } 776 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 777 ierr = MatDestroy(A);CHKERRQ(ierr); 778 } 779 PetscFunctionReturn(0); 780 } 781 782 #undef __FUNCT__ 783 #define __FUNCT__ "MatView_MPISBAIJ" 784 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 785 { 786 PetscErrorCode ierr; 787 PetscTruth iascii,isdraw,issocket,isbinary; 788 789 PetscFunctionBegin; 790 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 791 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 792 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 793 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 794 if (iascii || isdraw || issocket || isbinary) { 795 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 796 } else { 797 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 798 } 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatDestroy_MPISBAIJ" 804 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 805 { 806 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 807 PetscErrorCode ierr; 808 809 PetscFunctionBegin; 810 #if defined(PETSC_USE_LOG) 811 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 812 #endif 813 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 814 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 815 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 816 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 817 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 818 #if defined (PETSC_USE_CTABLE) 819 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 820 #else 821 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 822 #endif 823 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 824 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 825 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 826 if (baij->slvec0) { 827 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 828 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 829 } 830 if (baij->slvec1) { 831 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 832 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 833 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 834 } 835 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 836 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 837 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 838 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 839 #if defined(PETSC_USE_MAT_SINGLE) 840 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 841 #endif 842 ierr = PetscFree(baij);CHKERRQ(ierr); 843 844 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 845 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 846 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 847 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatMult_MPISBAIJ" 853 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 854 { 855 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 856 PetscErrorCode ierr; 857 PetscInt nt,mbs=a->mbs,bs=A->bs; 858 PetscScalar *x,*from,zero=0.0; 859 860 PetscFunctionBegin; 861 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 862 if (nt != A->n) { 863 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 864 } 865 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 866 if (nt != A->m) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 868 } 869 870 /* diagonal part */ 871 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 872 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 873 874 /* subdiagonal part */ 875 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 876 877 /* copy x into the vec slvec0 */ 878 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 879 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 880 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 881 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 882 883 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 884 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 885 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 886 887 /* supperdiagonal part */ 888 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 889 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 895 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 896 { 897 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 898 PetscErrorCode ierr; 899 PetscInt nt; 900 901 PetscFunctionBegin; 902 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 903 if (nt != A->n) { 904 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 905 } 906 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 907 if (nt != A->m) { 908 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 909 } 910 911 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 912 /* do diagonal part */ 913 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 914 /* do supperdiagonal part */ 915 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 916 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 917 /* do subdiagonal part */ 918 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 919 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 920 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 921 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 927 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 928 { 929 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 930 PetscErrorCode ierr; 931 PetscInt mbs=a->mbs,bs=A->bs; 932 PetscScalar *x,*from,zero=0.0; 933 934 PetscFunctionBegin; 935 /* 936 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 937 PetscSynchronizedFlush(A->comm); 938 */ 939 /* diagonal part */ 940 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 941 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 942 943 /* subdiagonal part */ 944 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 945 946 /* copy x into the vec slvec0 */ 947 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 948 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 949 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 950 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 951 952 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 953 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 954 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 955 956 /* supperdiagonal part */ 957 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 958 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 964 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 965 { 966 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 967 PetscErrorCode ierr; 968 969 PetscFunctionBegin; 970 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 971 /* do diagonal part */ 972 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 973 /* do supperdiagonal part */ 974 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 975 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 976 977 /* do subdiagonal part */ 978 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 979 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 980 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 981 982 PetscFunctionReturn(0); 983 } 984 985 /* 986 This only works correctly for square matrices where the subblock A->A is the 987 diagonal block 988 */ 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 991 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 992 { 993 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 994 PetscErrorCode ierr; 995 996 PetscFunctionBegin; 997 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 998 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatScale_MPISBAIJ" 1004 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1005 { 1006 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1007 PetscErrorCode ierr; 1008 1009 PetscFunctionBegin; 1010 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1011 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1017 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1018 { 1019 PetscFunctionBegin; 1020 if (matin) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format"); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1026 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1027 { 1028 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1029 1030 PetscFunctionBegin; 1031 if (!baij->getrowactive) { 1032 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1033 } 1034 baij->getrowactive = PETSC_FALSE; 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1040 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1041 { 1042 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1043 PetscErrorCode ierr; 1044 1045 PetscFunctionBegin; 1046 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1047 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1053 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1054 { 1055 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1056 PetscErrorCode ierr; 1057 1058 PetscFunctionBegin; 1059 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1060 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1066 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1067 { 1068 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1069 PetscErrorCode ierr; 1070 1071 PetscFunctionBegin; 1072 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1073 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1079 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1080 { 1081 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1082 Mat A = a->A,B = a->B; 1083 PetscErrorCode ierr; 1084 PetscReal isend[5],irecv[5]; 1085 1086 PetscFunctionBegin; 1087 info->block_size = (PetscReal)matin->bs; 1088 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1089 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1090 isend[3] = info->memory; isend[4] = info->mallocs; 1091 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1092 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1093 isend[3] += info->memory; isend[4] += info->mallocs; 1094 if (flag == MAT_LOCAL) { 1095 info->nz_used = isend[0]; 1096 info->nz_allocated = isend[1]; 1097 info->nz_unneeded = isend[2]; 1098 info->memory = isend[3]; 1099 info->mallocs = isend[4]; 1100 } else if (flag == MAT_GLOBAL_MAX) { 1101 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1102 info->nz_used = irecv[0]; 1103 info->nz_allocated = irecv[1]; 1104 info->nz_unneeded = irecv[2]; 1105 info->memory = irecv[3]; 1106 info->mallocs = irecv[4]; 1107 } else if (flag == MAT_GLOBAL_SUM) { 1108 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1109 info->nz_used = irecv[0]; 1110 info->nz_allocated = irecv[1]; 1111 info->nz_unneeded = irecv[2]; 1112 info->memory = irecv[3]; 1113 info->mallocs = irecv[4]; 1114 } else { 1115 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1116 } 1117 info->rows_global = (PetscReal)A->M; 1118 info->columns_global = (PetscReal)A->N; 1119 info->rows_local = (PetscReal)A->m; 1120 info->columns_local = (PetscReal)A->N; 1121 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1122 info->fill_ratio_needed = 0; 1123 info->factor_mallocs = 0; 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1129 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1130 { 1131 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 switch (op) { 1136 case MAT_NO_NEW_NONZERO_LOCATIONS: 1137 case MAT_YES_NEW_NONZERO_LOCATIONS: 1138 case MAT_COLUMNS_UNSORTED: 1139 case MAT_COLUMNS_SORTED: 1140 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1141 case MAT_KEEP_ZEROED_ROWS: 1142 case MAT_NEW_NONZERO_LOCATION_ERR: 1143 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1144 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1145 break; 1146 case MAT_ROW_ORIENTED: 1147 a->roworiented = PETSC_TRUE; 1148 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1149 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1150 break; 1151 case MAT_ROWS_SORTED: 1152 case MAT_ROWS_UNSORTED: 1153 case MAT_YES_NEW_DIAGONALS: 1154 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1155 break; 1156 case MAT_COLUMN_ORIENTED: 1157 a->roworiented = PETSC_FALSE; 1158 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1159 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1160 break; 1161 case MAT_IGNORE_OFF_PROC_ENTRIES: 1162 a->donotstash = PETSC_TRUE; 1163 break; 1164 case MAT_NO_NEW_DIAGONALS: 1165 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1166 case MAT_USE_HASH_TABLE: 1167 a->ht_flag = PETSC_TRUE; 1168 break; 1169 case MAT_NOT_SYMMETRIC: 1170 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1171 case MAT_HERMITIAN: 1172 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1173 case MAT_SYMMETRIC: 1174 case MAT_STRUCTURALLY_SYMMETRIC: 1175 case MAT_NOT_HERMITIAN: 1176 case MAT_SYMMETRY_ETERNAL: 1177 case MAT_NOT_SYMMETRY_ETERNAL: 1178 break; 1179 default: 1180 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1187 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1188 { 1189 PetscErrorCode ierr; 1190 PetscFunctionBegin; 1191 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1197 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1198 { 1199 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1200 Mat a=baij->A, b=baij->B; 1201 PetscErrorCode ierr; 1202 PetscInt nv,m,n; 1203 PetscTruth flg; 1204 1205 PetscFunctionBegin; 1206 if (ll != rr){ 1207 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1208 if (!flg) 1209 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1210 } 1211 if (!ll) PetscFunctionReturn(0); 1212 1213 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1214 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1215 1216 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1217 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1218 1219 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1220 1221 /* left diagonalscale the off-diagonal part */ 1222 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1223 1224 /* scale the diagonal part */ 1225 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1226 1227 /* right diagonalscale the off-diagonal part */ 1228 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1229 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1235 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1236 { 1237 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1238 MPI_Comm comm = A->comm; 1239 static PetscTruth called = PETSC_FALSE; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 if (!a->rank) { 1244 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1245 } 1246 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1247 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1248 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1254 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1255 { 1256 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatEqual_MPISBAIJ" 1268 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1269 { 1270 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1271 Mat a,b,c,d; 1272 PetscTruth flg; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 a = matA->A; b = matA->B; 1277 c = matB->A; d = matB->B; 1278 1279 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1280 if (flg) { 1281 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1282 } 1283 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1289 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1290 { 1291 PetscErrorCode ierr; 1292 1293 PetscFunctionBegin; 1294 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1300 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1301 { 1302 PetscErrorCode ierr; 1303 PetscInt i; 1304 PetscTruth flg; 1305 1306 PetscFunctionBegin; 1307 for (i=0; i<n; i++) { 1308 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1309 if (!flg) { 1310 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1311 } 1312 } 1313 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 1318 /* -------------------------------------------------------------------*/ 1319 static struct _MatOps MatOps_Values = { 1320 MatSetValues_MPISBAIJ, 1321 MatGetRow_MPISBAIJ, 1322 MatRestoreRow_MPISBAIJ, 1323 MatMult_MPISBAIJ, 1324 /* 4*/ MatMultAdd_MPISBAIJ, 1325 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1326 MatMultAdd_MPISBAIJ, 1327 0, 1328 0, 1329 0, 1330 /*10*/ 0, 1331 0, 1332 0, 1333 MatRelax_MPISBAIJ, 1334 MatTranspose_MPISBAIJ, 1335 /*15*/ MatGetInfo_MPISBAIJ, 1336 MatEqual_MPISBAIJ, 1337 MatGetDiagonal_MPISBAIJ, 1338 MatDiagonalScale_MPISBAIJ, 1339 MatNorm_MPISBAIJ, 1340 /*20*/ MatAssemblyBegin_MPISBAIJ, 1341 MatAssemblyEnd_MPISBAIJ, 1342 0, 1343 MatSetOption_MPISBAIJ, 1344 MatZeroEntries_MPISBAIJ, 1345 /*25*/ 0, 1346 0, 1347 0, 1348 0, 1349 0, 1350 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1351 0, 1352 0, 1353 0, 1354 0, 1355 /*35*/ MatDuplicate_MPISBAIJ, 1356 0, 1357 0, 1358 0, 1359 0, 1360 /*40*/ 0, 1361 MatGetSubMatrices_MPISBAIJ, 1362 MatIncreaseOverlap_MPISBAIJ, 1363 MatGetValues_MPISBAIJ, 1364 0, 1365 /*45*/ MatPrintHelp_MPISBAIJ, 1366 MatScale_MPISBAIJ, 1367 0, 1368 0, 1369 0, 1370 /*50*/ 0, 1371 0, 1372 0, 1373 0, 1374 0, 1375 /*55*/ 0, 1376 0, 1377 MatSetUnfactored_MPISBAIJ, 1378 0, 1379 MatSetValuesBlocked_MPISBAIJ, 1380 /*60*/ 0, 1381 0, 1382 0, 1383 MatGetPetscMaps_Petsc, 1384 0, 1385 /*65*/ 0, 1386 0, 1387 0, 1388 0, 1389 0, 1390 /*70*/ MatGetRowMax_MPISBAIJ, 1391 0, 1392 0, 1393 0, 1394 0, 1395 /*75*/ 0, 1396 0, 1397 0, 1398 0, 1399 0, 1400 /*80*/ 0, 1401 0, 1402 0, 1403 0, 1404 MatLoad_MPISBAIJ, 1405 /*85*/ 0, 1406 0, 1407 0, 1408 0, 1409 0, 1410 /*90*/ 0, 1411 0, 1412 0, 1413 0, 1414 0, 1415 /*95*/ 0, 1416 0, 1417 0, 1418 0, 1419 0, 1420 /*100*/0, 1421 0, 1422 0, 1423 0, 1424 0, 1425 /*105*/0, 1426 MatRealPart_MPISBAIJ, 1427 MatImaginaryPart_MPISBAIJ 1428 }; 1429 1430 1431 EXTERN_C_BEGIN 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1434 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1435 { 1436 PetscFunctionBegin; 1437 *a = ((Mat_MPISBAIJ *)A->data)->A; 1438 *iscopy = PETSC_FALSE; 1439 PetscFunctionReturn(0); 1440 } 1441 EXTERN_C_END 1442 1443 EXTERN_C_BEGIN 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1446 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1447 { 1448 Mat_MPISBAIJ *b; 1449 PetscErrorCode ierr; 1450 PetscInt i,mbs,Mbs; 1451 1452 PetscFunctionBegin; 1453 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1454 1455 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1456 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1457 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1458 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1459 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1460 if (d_nnz) { 1461 for (i=0; i<B->m/bs; i++) { 1462 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1463 } 1464 } 1465 if (o_nnz) { 1466 for (i=0; i<B->m/bs; i++) { 1467 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1468 } 1469 } 1470 B->preallocated = PETSC_TRUE; 1471 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1472 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1473 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1474 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1475 1476 b = (Mat_MPISBAIJ*)B->data; 1477 mbs = B->m/bs; 1478 Mbs = B->M/bs; 1479 if (mbs*bs != B->m) { 1480 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs); 1481 } 1482 1483 B->bs = bs; 1484 b->bs2 = bs*bs; 1485 b->mbs = mbs; 1486 b->nbs = mbs; 1487 b->Mbs = Mbs; 1488 b->Nbs = Mbs; 1489 1490 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1491 b->rowners[0] = 0; 1492 for (i=2; i<=b->size; i++) { 1493 b->rowners[i] += b->rowners[i-1]; 1494 } 1495 b->rstart = b->rowners[b->rank]; 1496 b->rend = b->rowners[b->rank+1]; 1497 b->cstart = b->rstart; 1498 b->cend = b->rend; 1499 for (i=0; i<=b->size; i++) { 1500 b->rowners_bs[i] = b->rowners[i]*bs; 1501 } 1502 b->rstart_bs = b-> rstart*bs; 1503 b->rend_bs = b->rend*bs; 1504 1505 b->cstart_bs = b->cstart*bs; 1506 b->cend_bs = b->cend*bs; 1507 1508 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1509 ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr); 1510 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1511 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1512 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1513 1514 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1515 ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr); 1516 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1517 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1518 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1519 1520 /* build cache for off array entries formed */ 1521 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1522 1523 PetscFunctionReturn(0); 1524 } 1525 EXTERN_C_END 1526 1527 /*MC 1528 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1529 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1530 1531 Options Database Keys: 1532 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1533 1534 Level: beginner 1535 1536 .seealso: MatCreateMPISBAIJ 1537 M*/ 1538 1539 EXTERN_C_BEGIN 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatCreate_MPISBAIJ" 1542 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1543 { 1544 Mat_MPISBAIJ *b; 1545 PetscErrorCode ierr; 1546 PetscTruth flg; 1547 1548 PetscFunctionBegin; 1549 1550 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1551 B->data = (void*)b; 1552 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1553 1554 B->ops->destroy = MatDestroy_MPISBAIJ; 1555 B->ops->view = MatView_MPISBAIJ; 1556 B->mapping = 0; 1557 B->factor = 0; 1558 B->assembled = PETSC_FALSE; 1559 1560 B->insertmode = NOT_SET_VALUES; 1561 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1562 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1563 1564 /* build local table of row and column ownerships */ 1565 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 1566 b->cowners = b->rowners + b->size + 2; 1567 b->rowners_bs = b->cowners + b->size + 2; 1568 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1569 1570 /* build cache for off array entries formed */ 1571 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1572 b->donotstash = PETSC_FALSE; 1573 b->colmap = PETSC_NULL; 1574 b->garray = PETSC_NULL; 1575 b->roworiented = PETSC_TRUE; 1576 1577 #if defined(PETSC_USE_MAT_SINGLE) 1578 /* stuff for MatSetValues_XXX in single precision */ 1579 b->setvalueslen = 0; 1580 b->setvaluescopy = PETSC_NULL; 1581 #endif 1582 1583 /* stuff used in block assembly */ 1584 b->barray = 0; 1585 1586 /* stuff used for matrix vector multiply */ 1587 b->lvec = 0; 1588 b->Mvctx = 0; 1589 b->slvec0 = 0; 1590 b->slvec0b = 0; 1591 b->slvec1 = 0; 1592 b->slvec1a = 0; 1593 b->slvec1b = 0; 1594 b->sMvctx = 0; 1595 1596 /* stuff for MatGetRow() */ 1597 b->rowindices = 0; 1598 b->rowvalues = 0; 1599 b->getrowactive = PETSC_FALSE; 1600 1601 /* hash table stuff */ 1602 b->ht = 0; 1603 b->hd = 0; 1604 b->ht_size = 0; 1605 b->ht_flag = PETSC_FALSE; 1606 b->ht_fact = 0; 1607 b->ht_total_ct = 0; 1608 b->ht_insert_ct = 0; 1609 1610 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1611 if (flg) { 1612 PetscReal fact = 1.39; 1613 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1614 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1615 if (fact <= 1.0) fact = 1.39; 1616 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1617 ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 1618 } 1619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1620 "MatStoreValues_MPISBAIJ", 1621 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1623 "MatRetrieveValues_MPISBAIJ", 1624 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1626 "MatGetDiagonalBlock_MPISBAIJ", 1627 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1629 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1630 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1631 B->symmetric = PETSC_TRUE; 1632 B->structurally_symmetric = PETSC_TRUE; 1633 B->symmetric_set = PETSC_TRUE; 1634 B->structurally_symmetric_set = PETSC_TRUE; 1635 PetscFunctionReturn(0); 1636 } 1637 EXTERN_C_END 1638 1639 /*MC 1640 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1641 1642 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1643 and MATMPISBAIJ otherwise. 1644 1645 Options Database Keys: 1646 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1647 1648 Level: beginner 1649 1650 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1651 M*/ 1652 1653 EXTERN_C_BEGIN 1654 #undef __FUNCT__ 1655 #define __FUNCT__ "MatCreate_SBAIJ" 1656 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1657 { 1658 PetscErrorCode ierr; 1659 PetscMPIInt size; 1660 1661 PetscFunctionBegin; 1662 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1663 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1664 if (size == 1) { 1665 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1666 } else { 1667 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1668 } 1669 PetscFunctionReturn(0); 1670 } 1671 EXTERN_C_END 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1675 /*@C 1676 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1677 the user should preallocate the matrix storage by setting the parameters 1678 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1679 performance can be increased by more than a factor of 50. 1680 1681 Collective on Mat 1682 1683 Input Parameters: 1684 + A - the matrix 1685 . bs - size of blockk 1686 . d_nz - number of block nonzeros per block row in diagonal portion of local 1687 submatrix (same for all local rows) 1688 . d_nnz - array containing the number of block nonzeros in the various block rows 1689 in the upper triangular and diagonal part of the in diagonal portion of the local 1690 (possibly different for each block row) or PETSC_NULL. You must leave room 1691 for the diagonal entry even if it is zero. 1692 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1693 submatrix (same for all local rows). 1694 - o_nnz - array containing the number of nonzeros in the various block rows of the 1695 off-diagonal portion of the local submatrix (possibly different for 1696 each block row) or PETSC_NULL. 1697 1698 1699 Options Database Keys: 1700 . -mat_no_unroll - uses code that does not unroll the loops in the 1701 block calculations (much slower) 1702 . -mat_block_size - size of the blocks to use 1703 1704 Notes: 1705 1706 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1707 than it must be used on all processors that share the object for that argument. 1708 1709 If the *_nnz parameter is given then the *_nz parameter is ignored 1710 1711 Storage Information: 1712 For a square global matrix we define each processor's diagonal portion 1713 to be its local rows and the corresponding columns (a square submatrix); 1714 each processor's off-diagonal portion encompasses the remainder of the 1715 local matrix (a rectangular submatrix). 1716 1717 The user can specify preallocated storage for the diagonal part of 1718 the local submatrix with either d_nz or d_nnz (not both). Set 1719 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1720 memory allocation. Likewise, specify preallocated storage for the 1721 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1722 1723 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1724 the figure below we depict these three local rows and all columns (0-11). 1725 1726 .vb 1727 0 1 2 3 4 5 6 7 8 9 10 11 1728 ------------------- 1729 row 3 | o o o d d d o o o o o o 1730 row 4 | o o o d d d o o o o o o 1731 row 5 | o o o d d d o o o o o o 1732 ------------------- 1733 .ve 1734 1735 Thus, any entries in the d locations are stored in the d (diagonal) 1736 submatrix, and any entries in the o locations are stored in the 1737 o (off-diagonal) submatrix. Note that the d matrix is stored in 1738 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1739 1740 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1741 plus the diagonal part of the d matrix, 1742 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1743 In general, for PDE problems in which most nonzeros are near the diagonal, 1744 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1745 or you will get TERRIBLE performance; see the users' manual chapter on 1746 matrices. 1747 1748 Level: intermediate 1749 1750 .keywords: matrix, block, aij, compressed row, sparse, parallel 1751 1752 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1753 @*/ 1754 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1755 { 1756 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1757 1758 PetscFunctionBegin; 1759 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1760 if (f) { 1761 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1762 } 1763 PetscFunctionReturn(0); 1764 } 1765 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatCreateMPISBAIJ" 1768 /*@C 1769 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1770 (block compressed row). For good matrix assembly performance 1771 the user should preallocate the matrix storage by setting the parameters 1772 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1773 performance can be increased by more than a factor of 50. 1774 1775 Collective on MPI_Comm 1776 1777 Input Parameters: 1778 + comm - MPI communicator 1779 . bs - size of blockk 1780 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1781 This value should be the same as the local size used in creating the 1782 y vector for the matrix-vector product y = Ax. 1783 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1784 This value should be the same as the local size used in creating the 1785 x vector for the matrix-vector product y = Ax. 1786 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1787 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1788 . d_nz - number of block nonzeros per block row in diagonal portion of local 1789 submatrix (same for all local rows) 1790 . d_nnz - array containing the number of block nonzeros in the various block rows 1791 in the upper triangular portion of the in diagonal portion of the local 1792 (possibly different for each block block row) or PETSC_NULL. 1793 You must leave room for the diagonal entry even if it is zero. 1794 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1795 submatrix (same for all local rows). 1796 - o_nnz - array containing the number of nonzeros in the various block rows of the 1797 off-diagonal portion of the local submatrix (possibly different for 1798 each block row) or PETSC_NULL. 1799 1800 Output Parameter: 1801 . A - the matrix 1802 1803 Options Database Keys: 1804 . -mat_no_unroll - uses code that does not unroll the loops in the 1805 block calculations (much slower) 1806 . -mat_block_size - size of the blocks to use 1807 . -mat_mpi - use the parallel matrix data structures even on one processor 1808 (defaults to using SeqBAIJ format on one processor) 1809 1810 Notes: 1811 The number of rows and columns must be divisible by blocksize. 1812 1813 The user MUST specify either the local or global matrix dimensions 1814 (possibly both). 1815 1816 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1817 than it must be used on all processors that share the object for that argument. 1818 1819 If the *_nnz parameter is given then the *_nz parameter is ignored 1820 1821 Storage Information: 1822 For a square global matrix we define each processor's diagonal portion 1823 to be its local rows and the corresponding columns (a square submatrix); 1824 each processor's off-diagonal portion encompasses the remainder of the 1825 local matrix (a rectangular submatrix). 1826 1827 The user can specify preallocated storage for the diagonal part of 1828 the local submatrix with either d_nz or d_nnz (not both). Set 1829 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1830 memory allocation. Likewise, specify preallocated storage for the 1831 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1832 1833 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1834 the figure below we depict these three local rows and all columns (0-11). 1835 1836 .vb 1837 0 1 2 3 4 5 6 7 8 9 10 11 1838 ------------------- 1839 row 3 | o o o d d d o o o o o o 1840 row 4 | o o o d d d o o o o o o 1841 row 5 | o o o d d d o o o o o o 1842 ------------------- 1843 .ve 1844 1845 Thus, any entries in the d locations are stored in the d (diagonal) 1846 submatrix, and any entries in the o locations are stored in the 1847 o (off-diagonal) submatrix. Note that the d matrix is stored in 1848 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1849 1850 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1851 plus the diagonal part of the d matrix, 1852 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1853 In general, for PDE problems in which most nonzeros are near the diagonal, 1854 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1855 or you will get TERRIBLE performance; see the users' manual chapter on 1856 matrices. 1857 1858 Level: intermediate 1859 1860 .keywords: matrix, block, aij, compressed row, sparse, parallel 1861 1862 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1863 @*/ 1864 1865 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 1866 { 1867 PetscErrorCode ierr; 1868 PetscMPIInt size; 1869 1870 PetscFunctionBegin; 1871 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1872 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1873 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1874 if (size > 1) { 1875 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1876 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1877 } else { 1878 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1879 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1880 } 1881 PetscFunctionReturn(0); 1882 } 1883 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1887 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1888 { 1889 Mat mat; 1890 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1891 PetscErrorCode ierr; 1892 PetscInt len=0,nt,bs=matin->bs,mbs=oldmat->mbs; 1893 PetscScalar *array; 1894 1895 PetscFunctionBegin; 1896 *newmat = 0; 1897 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1898 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1899 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1900 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1901 1902 mat->factor = matin->factor; 1903 mat->preallocated = PETSC_TRUE; 1904 mat->assembled = PETSC_TRUE; 1905 mat->insertmode = NOT_SET_VALUES; 1906 1907 a = (Mat_MPISBAIJ*)mat->data; 1908 mat->bs = matin->bs; 1909 a->bs2 = oldmat->bs2; 1910 a->mbs = oldmat->mbs; 1911 a->nbs = oldmat->nbs; 1912 a->Mbs = oldmat->Mbs; 1913 a->Nbs = oldmat->Nbs; 1914 1915 a->rstart = oldmat->rstart; 1916 a->rend = oldmat->rend; 1917 a->cstart = oldmat->cstart; 1918 a->cend = oldmat->cend; 1919 a->size = oldmat->size; 1920 a->rank = oldmat->rank; 1921 a->donotstash = oldmat->donotstash; 1922 a->roworiented = oldmat->roworiented; 1923 a->rowindices = 0; 1924 a->rowvalues = 0; 1925 a->getrowactive = PETSC_FALSE; 1926 a->barray = 0; 1927 a->rstart_bs = oldmat->rstart_bs; 1928 a->rend_bs = oldmat->rend_bs; 1929 a->cstart_bs = oldmat->cstart_bs; 1930 a->cend_bs = oldmat->cend_bs; 1931 1932 /* hash table stuff */ 1933 a->ht = 0; 1934 a->hd = 0; 1935 a->ht_size = 0; 1936 a->ht_flag = oldmat->ht_flag; 1937 a->ht_fact = oldmat->ht_fact; 1938 a->ht_total_ct = 0; 1939 a->ht_insert_ct = 0; 1940 1941 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1942 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1943 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 1944 if (oldmat->colmap) { 1945 #if defined (PETSC_USE_CTABLE) 1946 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1947 #else 1948 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1949 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1950 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1951 #endif 1952 } else a->colmap = 0; 1953 1954 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 1955 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1956 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1957 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 1958 } else a->garray = 0; 1959 1960 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1961 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1962 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1963 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1964 1965 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 1966 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 1967 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 1968 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 1969 1970 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 1971 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 1972 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 1973 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 1974 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 1975 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 1976 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 1977 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 1978 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 1979 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 1980 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 1981 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 1982 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 1983 1984 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 1985 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 1986 a->sMvctx = oldmat->sMvctx; 1987 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 1988 1989 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1990 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1991 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1992 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1993 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 1994 *newmat = mat; 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #include "petscsys.h" 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "MatLoad_MPISBAIJ" 2002 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2003 { 2004 Mat A; 2005 PetscErrorCode ierr; 2006 PetscInt i,nz,j,rstart,rend; 2007 PetscScalar *vals,*buf; 2008 MPI_Comm comm = ((PetscObject)viewer)->comm; 2009 MPI_Status status; 2010 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 2011 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2012 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2013 PetscInt bs=1,Mbs,mbs,extra_rows; 2014 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2015 PetscInt dcount,kmax,k,nzcount,tmp; 2016 int fd; 2017 2018 PetscFunctionBegin; 2019 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2020 2021 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2022 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2023 if (!rank) { 2024 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2025 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2026 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2027 if (header[3] < 0) { 2028 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2029 } 2030 } 2031 2032 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2033 M = header[1]; N = header[2]; 2034 2035 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2036 2037 /* 2038 This code adds extra rows to make sure the number of rows is 2039 divisible by the blocksize 2040 */ 2041 Mbs = M/bs; 2042 extra_rows = bs - M + bs*(Mbs); 2043 if (extra_rows == bs) extra_rows = 0; 2044 else Mbs++; 2045 if (extra_rows &&!rank) { 2046 ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2047 } 2048 2049 /* determine ownership of all rows */ 2050 mbs = Mbs/size + ((Mbs % size) > rank); 2051 m = mbs*bs; 2052 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2053 browners = rowners + size + 1; 2054 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2055 rowners[0] = 0; 2056 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2057 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2058 rstart = rowners[rank]; 2059 rend = rowners[rank+1]; 2060 2061 /* distribute row lengths to all processors */ 2062 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2063 if (!rank) { 2064 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2065 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2066 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2067 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2068 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2069 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2070 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2071 } else { 2072 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2073 } 2074 2075 if (!rank) { /* procs[0] */ 2076 /* calculate the number of nonzeros on each processor */ 2077 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2078 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2079 for (i=0; i<size; i++) { 2080 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2081 procsnz[i] += rowlengths[j]; 2082 } 2083 } 2084 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2085 2086 /* determine max buffer needed and allocate it */ 2087 maxnz = 0; 2088 for (i=0; i<size; i++) { 2089 maxnz = PetscMax(maxnz,procsnz[i]); 2090 } 2091 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2092 2093 /* read in my part of the matrix column indices */ 2094 nz = procsnz[0]; 2095 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2096 mycols = ibuf; 2097 if (size == 1) nz -= extra_rows; 2098 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2099 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2100 2101 /* read in every ones (except the last) and ship off */ 2102 for (i=1; i<size-1; i++) { 2103 nz = procsnz[i]; 2104 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2105 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2106 } 2107 /* read in the stuff for the last proc */ 2108 if (size != 1) { 2109 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2110 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2111 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2112 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2113 } 2114 ierr = PetscFree(cols);CHKERRQ(ierr); 2115 } else { /* procs[i], i>0 */ 2116 /* determine buffer space needed for message */ 2117 nz = 0; 2118 for (i=0; i<m; i++) { 2119 nz += locrowlens[i]; 2120 } 2121 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2122 mycols = ibuf; 2123 /* receive message of column indices*/ 2124 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2125 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2126 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2127 } 2128 2129 /* loop over local rows, determining number of off diagonal entries */ 2130 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2131 odlens = dlens + (rend-rstart); 2132 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2133 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2134 masked1 = mask + Mbs; 2135 masked2 = masked1 + Mbs; 2136 rowcount = 0; nzcount = 0; 2137 for (i=0; i<mbs; i++) { 2138 dcount = 0; 2139 odcount = 0; 2140 for (j=0; j<bs; j++) { 2141 kmax = locrowlens[rowcount]; 2142 for (k=0; k<kmax; k++) { 2143 tmp = mycols[nzcount++]/bs; /* block col. index */ 2144 if (!mask[tmp]) { 2145 mask[tmp] = 1; 2146 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2147 else masked1[dcount++] = tmp; /* entry in diag portion */ 2148 } 2149 } 2150 rowcount++; 2151 } 2152 2153 dlens[i] = dcount; /* d_nzz[i] */ 2154 odlens[i] = odcount; /* o_nzz[i] */ 2155 2156 /* zero out the mask elements we set */ 2157 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2158 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2159 } 2160 2161 /* create our matrix */ 2162 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2163 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2164 ierr = MatSetType(A,type);CHKERRQ(ierr); 2165 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2166 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2167 2168 if (!rank) { 2169 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2170 /* read in my part of the matrix numerical values */ 2171 nz = procsnz[0]; 2172 vals = buf; 2173 mycols = ibuf; 2174 if (size == 1) nz -= extra_rows; 2175 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2176 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2177 2178 /* insert into matrix */ 2179 jj = rstart*bs; 2180 for (i=0; i<m; i++) { 2181 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2182 mycols += locrowlens[i]; 2183 vals += locrowlens[i]; 2184 jj++; 2185 } 2186 2187 /* read in other processors (except the last one) and ship out */ 2188 for (i=1; i<size-1; i++) { 2189 nz = procsnz[i]; 2190 vals = buf; 2191 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2192 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2193 } 2194 /* the last proc */ 2195 if (size != 1){ 2196 nz = procsnz[i] - extra_rows; 2197 vals = buf; 2198 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2199 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2200 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2201 } 2202 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2203 2204 } else { 2205 /* receive numeric values */ 2206 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2207 2208 /* receive message of values*/ 2209 vals = buf; 2210 mycols = ibuf; 2211 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2212 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2213 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2214 2215 /* insert into matrix */ 2216 jj = rstart*bs; 2217 for (i=0; i<m; i++) { 2218 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2219 mycols += locrowlens[i]; 2220 vals += locrowlens[i]; 2221 jj++; 2222 } 2223 } 2224 2225 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2226 ierr = PetscFree(buf);CHKERRQ(ierr); 2227 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2228 ierr = PetscFree(rowners);CHKERRQ(ierr); 2229 ierr = PetscFree(dlens);CHKERRQ(ierr); 2230 ierr = PetscFree(mask);CHKERRQ(ierr); 2231 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2232 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2233 *newmat = A; 2234 PetscFunctionReturn(0); 2235 } 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2239 /*XXXXX@ 2240 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2241 2242 Input Parameters: 2243 . mat - the matrix 2244 . fact - factor 2245 2246 Collective on Mat 2247 2248 Level: advanced 2249 2250 Notes: 2251 This can also be set by the command line option: -mat_use_hash_table fact 2252 2253 .keywords: matrix, hashtable, factor, HT 2254 2255 .seealso: MatSetOption() 2256 @XXXXX*/ 2257 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2261 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2262 { 2263 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2264 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2265 PetscReal atmp; 2266 PetscReal *work,*svalues,*rvalues; 2267 PetscErrorCode ierr; 2268 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2269 PetscMPIInt rank,size; 2270 PetscInt *rowners_bs,dest,count,source; 2271 PetscScalar *va; 2272 MatScalar *ba; 2273 MPI_Status stat; 2274 2275 PetscFunctionBegin; 2276 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2277 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2278 2279 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2280 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2281 2282 bs = A->bs; 2283 mbs = a->mbs; 2284 Mbs = a->Mbs; 2285 ba = b->a; 2286 bi = b->i; 2287 bj = b->j; 2288 2289 /* find ownerships */ 2290 rowners_bs = a->rowners_bs; 2291 2292 /* each proc creates an array to be distributed */ 2293 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2294 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2295 2296 /* row_max for B */ 2297 if (rank != size-1){ 2298 for (i=0; i<mbs; i++) { 2299 ncols = bi[1] - bi[0]; bi++; 2300 brow = bs*i; 2301 for (j=0; j<ncols; j++){ 2302 bcol = bs*(*bj); 2303 for (kcol=0; kcol<bs; kcol++){ 2304 col = bcol + kcol; /* local col index */ 2305 col += rowners_bs[rank+1]; /* global col index */ 2306 for (krow=0; krow<bs; krow++){ 2307 atmp = PetscAbsScalar(*ba); ba++; 2308 row = brow + krow; /* local row index */ 2309 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2310 if (work[col] < atmp) work[col] = atmp; 2311 } 2312 } 2313 bj++; 2314 } 2315 } 2316 2317 /* send values to its owners */ 2318 for (dest=rank+1; dest<size; dest++){ 2319 svalues = work + rowners_bs[dest]; 2320 count = rowners_bs[dest+1]-rowners_bs[dest]; 2321 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2322 } 2323 } 2324 2325 /* receive values */ 2326 if (rank){ 2327 rvalues = work; 2328 count = rowners_bs[rank+1]-rowners_bs[rank]; 2329 for (source=0; source<rank; source++){ 2330 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2331 /* process values */ 2332 for (i=0; i<count; i++){ 2333 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2334 } 2335 } 2336 } 2337 2338 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2339 ierr = PetscFree(work);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "MatRelax_MPISBAIJ" 2345 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2346 { 2347 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2348 PetscErrorCode ierr; 2349 PetscInt mbs=mat->mbs,bs=matin->bs; 2350 PetscScalar *x,*b,*ptr,zero=0.0; 2351 Vec bb1; 2352 2353 PetscFunctionBegin; 2354 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2355 if (bs > 1) 2356 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2357 2358 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2359 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2360 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2361 its--; 2362 } 2363 2364 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2365 while (its--){ 2366 2367 /* lower triangular part: slvec0b = - B^T*xx */ 2368 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2369 2370 /* copy xx into slvec0a */ 2371 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2372 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2373 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2374 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2375 2376 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2377 2378 /* copy bb into slvec1a */ 2379 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2380 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2381 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2382 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2383 2384 /* set slvec1b = 0 */ 2385 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2386 2387 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2388 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2389 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2390 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2391 2392 /* upper triangular part: bb1 = bb1 - B*x */ 2393 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2394 2395 /* local diagonal sweep */ 2396 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2397 } 2398 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2399 } else { 2400 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2401 } 2402 PetscFunctionReturn(0); 2403 } 2404 2405 #undef __FUNCT__ 2406 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2407 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2408 { 2409 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2410 PetscErrorCode ierr; 2411 Vec lvec1,bb1; 2412 2413 PetscFunctionBegin; 2414 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2415 if (matin->bs > 1) 2416 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2417 2418 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2419 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2420 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2421 its--; 2422 } 2423 2424 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2425 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2426 while (its--){ 2427 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2428 2429 /* lower diagonal part: bb1 = bb - B^T*xx */ 2430 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2431 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2432 2433 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2434 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2435 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2436 2437 /* upper diagonal part: bb1 = bb1 - B*x */ 2438 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2439 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2440 2441 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2442 2443 /* diagonal sweep */ 2444 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2445 } 2446 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2447 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2448 } else { 2449 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2450 } 2451 PetscFunctionReturn(0); 2452 } 2453 2454