1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 104 N = nrow++ - 1; \ 105 /* shift up all the later entries in this row */ \ 106 for (ii=N; ii>=_i; ii--) { \ 107 rp[ii+1] = rp[ii]; \ 108 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 109 } \ 110 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 111 rp[_i] = bcol; \ 112 ap[bs2*_i + bs*cidx + ridx] = value; \ 113 a_noinsert:; \ 114 ailen[brow] = nrow; \ 115 } 116 #ifndef MatSetValues_SeqBAIJ_B_Private 117 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 118 { \ 119 brow = row/bs; \ 120 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 121 rmax = bimax[brow]; nrow = bilen[brow]; \ 122 bcol = col/bs; \ 123 ridx = row % bs; cidx = col % bs; \ 124 low = 0; high = nrow; \ 125 while (high-low > 3) { \ 126 t = (low+high)/2; \ 127 if (rp[t] > bcol) high = t; \ 128 else low = t; \ 129 } \ 130 for (_i=low; _i<high; _i++) { \ 131 if (rp[_i] > bcol) break; \ 132 if (rp[_i] == bcol) { \ 133 bap = ap + bs2*_i + bs*cidx + ridx; \ 134 if (addv == ADD_VALUES) *bap += value; \ 135 else *bap = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (b->nonew == 1) goto b_noinsert; \ 140 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 141 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 b_noinsert:; \ 152 bilen[brow] = nrow; \ 153 } 154 #endif 155 156 #if defined(PETSC_USE_MAT_SINGLE) 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetValues_MPISBAIJ" 159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 160 { 161 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,N = m*n; 164 MatScalar *vsingle; 165 166 PetscFunctionBegin; 167 if (N > b->setvalueslen) { 168 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 169 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 170 b->setvalueslen = N; 171 } 172 vsingle = b->setvaluescopy; 173 174 for (i=0; i<N; i++) { 175 vsingle[i] = v[i]; 176 } 177 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 184 { 185 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 186 PetscErrorCode ierr; 187 PetscInt i,N = m*n*b->bs2; 188 MatScalar *vsingle; 189 190 PetscFunctionBegin; 191 if (N > b->setvalueslen) { 192 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 193 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 194 b->setvalueslen = N; 195 } 196 vsingle = b->setvaluescopy; 197 for (i=0; i<N; i++) { 198 vsingle[i] = v[i]; 199 } 200 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 #endif 204 205 /* Only add/insert a(i,j) with i<=j (blocks). 206 Any a(i,j) with i>j input by user is ingored. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 211 { 212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 213 MatScalar value; 214 PetscTruth roworiented = baij->roworiented; 215 PetscErrorCode ierr; 216 PetscInt i,j,row,col; 217 PetscInt rstart_orig=baij->rstart_bs; 218 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 219 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 220 221 /* Some Variables required in the macro */ 222 Mat A = baij->A; 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 224 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 225 MatScalar *aa=a->a; 226 227 Mat B = baij->B; 228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 229 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 230 MatScalar *ba=b->a; 231 232 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 233 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 234 MatScalar *ap,*bap; 235 236 /* for stash */ 237 PetscInt n_loc, *in_loc=0; 238 MatScalar *v_loc=0; 239 240 PetscFunctionBegin; 241 242 if(!baij->donotstash){ 243 ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr); 244 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 245 } 246 247 for (i=0; i<m; i++) { 248 if (im[i] < 0) continue; 249 #if defined(PETSC_USE_DEBUG) 250 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 251 #endif 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 253 row = im[i] - rstart_orig; /* local row index */ 254 for (j=0; j<n; j++) { 255 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 256 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 257 col = in[j] - cstart_orig; /* local col index */ 258 brow = row/bs; bcol = col/bs; 259 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 260 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 261 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } else if (in[j] < 0) continue; 264 #if defined(PETSC_USE_DEBUG) 265 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 266 #endif 267 else { /* off-diag entry (B) */ 268 if (mat->was_assembled) { 269 if (!baij->colmap) { 270 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 271 } 272 #if defined (PETSC_USE_CTABLE) 273 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 274 col = col - 1; 275 #else 276 col = baij->colmap[in[j]/bs] - 1; 277 #endif 278 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 279 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 280 col = in[j]; 281 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 282 B = baij->B; 283 b = (Mat_SeqBAIJ*)(B)->data; 284 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 285 ba=b->a; 286 } else col += in[j]%bs; 287 } else col = in[j]; 288 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 289 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 290 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 291 } 292 } 293 } else { /* off processor entry */ 294 if (!baij->donotstash) { 295 n_loc = 0; 296 for (j=0; j<n; j++){ 297 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 298 in_loc[n_loc] = in[j]; 299 if (roworiented) { 300 v_loc[n_loc] = v[i*n+j]; 301 } else { 302 v_loc[n_loc] = v[j*m+i]; 303 } 304 n_loc++; 305 } 306 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 307 } 308 } 309 } 310 311 if(!baij->donotstash){ 312 ierr = PetscFree(in_loc);CHKERRQ(ierr); 313 ierr = PetscFree(v_loc);CHKERRQ(ierr); 314 } 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 321 { 322 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 323 const MatScalar *value; 324 MatScalar *barray=baij->barray; 325 PetscTruth roworiented = baij->roworiented; 326 PetscErrorCode ierr; 327 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 328 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 329 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 330 331 PetscFunctionBegin; 332 if(!barray) { 333 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 334 baij->barray = barray; 335 } 336 337 if (roworiented) { 338 stepval = (n-1)*bs; 339 } else { 340 stepval = (m-1)*bs; 341 } 342 for (i=0; i<m; i++) { 343 if (im[i] < 0) continue; 344 #if defined(PETSC_USE_DEBUG) 345 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 346 #endif 347 if (im[i] >= rstart && im[i] < rend) { 348 row = im[i] - rstart; 349 for (j=0; j<n; j++) { 350 /* If NumCol = 1 then a copy is not required */ 351 if ((roworiented) && (n == 1)) { 352 barray = (MatScalar*) v + i*bs2; 353 } else if((!roworiented) && (m == 1)) { 354 barray = (MatScalar*) v + j*bs2; 355 } else { /* Here a copy is required */ 356 if (roworiented) { 357 value = v + i*(stepval+bs)*bs + j*bs; 358 } else { 359 value = v + j*(stepval+bs)*bs + i*bs; 360 } 361 for (ii=0; ii<bs; ii++,value+=stepval) { 362 for (jj=0; jj<bs; jj++) { 363 *barray++ = *value++; 364 } 365 } 366 barray -=bs2; 367 } 368 369 if (in[j] >= cstart && in[j] < cend){ 370 col = in[j] - cstart; 371 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 372 } 373 else if (in[j] < 0) continue; 374 #if defined(PETSC_USE_DEBUG) 375 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 376 #endif 377 else { 378 if (mat->was_assembled) { 379 if (!baij->colmap) { 380 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 381 } 382 383 #if defined(PETSC_USE_DEBUG) 384 #if defined (PETSC_USE_CTABLE) 385 { PetscInt data; 386 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 387 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 388 } 389 #else 390 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 391 #endif 392 #endif 393 #if defined (PETSC_USE_CTABLE) 394 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 395 col = (col - 1)/bs; 396 #else 397 col = (baij->colmap[in[j]] - 1)/bs; 398 #endif 399 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 400 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 401 col = in[j]; 402 } 403 } 404 else col = in[j]; 405 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 406 } 407 } 408 } else { 409 if (!baij->donotstash) { 410 if (roworiented) { 411 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 412 } else { 413 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 414 } 415 } 416 } 417 } 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatGetValues_MPISBAIJ" 423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 424 { 425 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 426 PetscErrorCode ierr; 427 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 428 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 429 430 PetscFunctionBegin; 431 for (i=0; i<m; i++) { 432 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 433 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 434 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 435 row = idxm[i] - bsrstart; 436 for (j=0; j<n; j++) { 437 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 438 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 439 if (idxn[j] >= bscstart && idxn[j] < bscend){ 440 col = idxn[j] - bscstart; 441 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } else { 443 if (!baij->colmap) { 444 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 445 } 446 #if defined (PETSC_USE_CTABLE) 447 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 448 data --; 449 #else 450 data = baij->colmap[idxn[j]/bs]-1; 451 #endif 452 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 453 else { 454 col = data + idxn[j]%bs; 455 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 456 } 457 } 458 } 459 } else { 460 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 461 } 462 } 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatNorm_MPISBAIJ" 468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 469 { 470 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 471 PetscErrorCode ierr; 472 PetscReal sum[2],*lnorm2; 473 474 PetscFunctionBegin; 475 if (baij->size == 1) { 476 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 477 } else { 478 if (type == NORM_FROBENIUS) { 479 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 480 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 481 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 482 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 483 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 484 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 485 *norm = sqrt(sum[0] + 2*sum[1]); 486 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 487 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 488 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 489 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 490 PetscReal *rsum,*rsum2,vabs; 491 PetscInt *jj,*garray=baij->garray,rstart=baij->rstart,nz; 492 PetscInt brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs; 493 MatScalar *v; 494 495 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 496 rsum2 = rsum + mat->N; 497 ierr = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 498 /* Amat */ 499 v = amat->a; jj = amat->j; 500 for (brow=0; brow<mbs; brow++) { 501 grow = bs*(rstart + brow); 502 nz = amat->i[brow+1] - amat->i[brow]; 503 for (bcol=0; bcol<nz; bcol++){ 504 gcol = bs*(rstart + *jj); jj++; 505 for (col=0; col<bs; col++){ 506 for (row=0; row<bs; row++){ 507 vabs = PetscAbsScalar(*v); v++; 508 rsum[gcol+col] += vabs; 509 /* non-diagonal block */ 510 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 511 } 512 } 513 } 514 } 515 /* Bmat */ 516 v = bmat->a; jj = bmat->j; 517 for (brow=0; brow<mbs; brow++) { 518 grow = bs*(rstart + brow); 519 nz = bmat->i[brow+1] - bmat->i[brow]; 520 for (bcol=0; bcol<nz; bcol++){ 521 gcol = bs*garray[*jj]; jj++; 522 for (col=0; col<bs; col++){ 523 for (row=0; row<bs; row++){ 524 vabs = PetscAbsScalar(*v); v++; 525 rsum[gcol+col] += vabs; 526 rsum[grow+row] += vabs; 527 } 528 } 529 } 530 } 531 ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 532 *norm = 0.0; 533 for (col=0; col<mat->N; col++) { 534 if (rsum2[col] > *norm) *norm = rsum2[col]; 535 } 536 ierr = PetscFree(rsum);CHKERRQ(ierr); 537 } else { 538 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 539 } 540 } 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 547 { 548 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 549 PetscErrorCode ierr; 550 PetscInt nstash,reallocs; 551 InsertMode addv; 552 553 PetscFunctionBegin; 554 if (baij->donotstash) { 555 PetscFunctionReturn(0); 556 } 557 558 /* make sure all processors are either in INSERTMODE or ADDMODE */ 559 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 560 if (addv == (ADD_VALUES|INSERT_VALUES)) { 561 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 562 } 563 mat->insertmode = addv; /* in case this processor had no cache */ 564 565 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 566 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 567 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 568 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 569 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 570 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 577 { 578 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 579 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 580 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 581 PetscErrorCode ierr; 582 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 583 PetscInt *row,*col,other_disassembled; 584 PetscMPIInt n; 585 PetscTruth r1,r2,r3; 586 MatScalar *val; 587 InsertMode addv = mat->insertmode; 588 589 PetscFunctionBegin; 590 591 if (!baij->donotstash) { 592 while (1) { 593 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 594 if (!flg) break; 595 596 for (i=0; i<n;) { 597 /* Now identify the consecutive vals belonging to the same row */ 598 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 599 if (j < n) ncols = j-i; 600 else ncols = n-i; 601 /* Now assemble all these values with a single function call */ 602 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 603 i = j; 604 } 605 } 606 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 607 /* Now process the block-stash. Since the values are stashed column-oriented, 608 set the roworiented flag to column oriented, and after MatSetValues() 609 restore the original flags */ 610 r1 = baij->roworiented; 611 r2 = a->roworiented; 612 r3 = b->roworiented; 613 baij->roworiented = PETSC_FALSE; 614 a->roworiented = PETSC_FALSE; 615 b->roworiented = PETSC_FALSE; 616 while (1) { 617 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 618 if (!flg) break; 619 620 for (i=0; i<n;) { 621 /* Now identify the consecutive vals belonging to the same row */ 622 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 623 if (j < n) ncols = j-i; 624 else ncols = n-i; 625 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 626 i = j; 627 } 628 } 629 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 630 baij->roworiented = r1; 631 a->roworiented = r2; 632 b->roworiented = r3; 633 } 634 635 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 636 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 637 638 /* determine if any processor has disassembled, if so we must 639 also disassemble ourselfs, in order that we may reassemble. */ 640 /* 641 if nonzero structure of submatrix B cannot change then we know that 642 no processor disassembled thus we can skip this stuff 643 */ 644 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 645 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 646 if (mat->was_assembled && !other_disassembled) { 647 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 648 } 649 } 650 651 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 652 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 653 } 654 b->compressedrow.use = PETSC_TRUE; 655 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 656 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 657 658 if (baij->rowvalues) { 659 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 660 baij->rowvalues = 0; 661 } 662 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt bs = mat->bs; 673 PetscMPIInt size = baij->size,rank = baij->rank; 674 PetscTruth iascii,isdraw; 675 PetscViewer sviewer; 676 PetscViewerFormat format; 677 678 PetscFunctionBegin; 679 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 680 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 681 if (iascii) { 682 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 683 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 684 MatInfo info; 685 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 686 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 687 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 688 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 689 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 690 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 692 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 694 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 695 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } else if (format == PETSC_VIEWER_ASCII_INFO) { 698 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 701 PetscFunctionReturn(0); 702 } 703 } 704 705 if (isdraw) { 706 PetscDraw draw; 707 PetscTruth isnull; 708 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 709 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 710 } 711 712 if (size == 1) { 713 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 714 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 715 } else { 716 /* assemble the entire matrix onto first processor. */ 717 Mat A; 718 Mat_SeqSBAIJ *Aloc; 719 Mat_SeqBAIJ *Bloc; 720 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 721 MatScalar *a; 722 723 /* Should this be the same type as mat? */ 724 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 725 if (!rank) { 726 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 727 } else { 728 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 729 } 730 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 731 ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 733 734 /* copy over the A part */ 735 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 736 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 737 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 738 739 for (i=0; i<mbs; i++) { 740 rvals[0] = bs*(baij->rstart + i); 741 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 742 for (j=ai[i]; j<ai[i+1]; j++) { 743 col = (baij->cstart+aj[j])*bs; 744 for (k=0; k<bs; k++) { 745 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 746 col++; a += bs; 747 } 748 } 749 } 750 /* copy over the B part */ 751 Bloc = (Mat_SeqBAIJ*)baij->B->data; 752 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 753 for (i=0; i<mbs; i++) { 754 rvals[0] = bs*(baij->rstart + i); 755 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 756 for (j=ai[i]; j<ai[i+1]; j++) { 757 col = baij->garray[aj[j]]*bs; 758 for (k=0; k<bs; k++) { 759 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 760 col++; a += bs; 761 } 762 } 763 } 764 ierr = PetscFree(rvals);CHKERRQ(ierr); 765 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 /* 768 Everyone has to call to draw the matrix since the graphics waits are 769 synchronized across all processors that share the PetscDraw object 770 */ 771 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 772 if (!rank) { 773 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 774 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 775 } 776 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 777 ierr = MatDestroy(A);CHKERRQ(ierr); 778 } 779 PetscFunctionReturn(0); 780 } 781 782 #undef __FUNCT__ 783 #define __FUNCT__ "MatView_MPISBAIJ" 784 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 785 { 786 PetscErrorCode ierr; 787 PetscTruth iascii,isdraw,issocket,isbinary; 788 789 PetscFunctionBegin; 790 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 791 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 792 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 793 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 794 if (iascii || isdraw || issocket || isbinary) { 795 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 796 } else { 797 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 798 } 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatDestroy_MPISBAIJ" 804 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 805 { 806 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 807 PetscErrorCode ierr; 808 809 PetscFunctionBegin; 810 #if defined(PETSC_USE_LOG) 811 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 812 #endif 813 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 814 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 815 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 816 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 817 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 818 #if defined (PETSC_USE_CTABLE) 819 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 820 #else 821 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 822 #endif 823 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 824 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 825 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 826 if (baij->slvec0) { 827 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 828 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 829 } 830 if (baij->slvec1) { 831 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 832 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 833 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 834 } 835 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 836 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 837 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 838 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 839 #if defined(PETSC_USE_MAT_SINGLE) 840 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 841 #endif 842 ierr = PetscFree(baij);CHKERRQ(ierr); 843 844 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 845 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 846 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 847 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatMult_MPISBAIJ" 853 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 854 { 855 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 856 PetscErrorCode ierr; 857 PetscInt nt,mbs=a->mbs,bs=A->bs; 858 PetscScalar *x,*from,zero=0.0; 859 860 PetscFunctionBegin; 861 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 862 if (nt != A->n) { 863 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 864 } 865 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 866 if (nt != A->m) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 868 } 869 870 /* diagonal part */ 871 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 872 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 873 874 /* subdiagonal part */ 875 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 876 877 /* copy x into the vec slvec0 */ 878 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 879 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 880 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 881 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 882 883 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 884 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 885 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 886 887 /* supperdiagonal part */ 888 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 889 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 895 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 896 { 897 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 898 PetscErrorCode ierr; 899 PetscInt nt; 900 901 PetscFunctionBegin; 902 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 903 if (nt != A->n) { 904 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 905 } 906 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 907 if (nt != A->m) { 908 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 909 } 910 911 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 912 /* do diagonal part */ 913 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 914 /* do supperdiagonal part */ 915 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 916 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 917 /* do subdiagonal part */ 918 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 919 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 920 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 921 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 927 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 928 { 929 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 930 PetscErrorCode ierr; 931 PetscInt mbs=a->mbs,bs=A->bs; 932 PetscScalar *x,*from,zero=0.0; 933 934 PetscFunctionBegin; 935 /* 936 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 937 PetscSynchronizedFlush(A->comm); 938 */ 939 /* diagonal part */ 940 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 941 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 942 943 /* subdiagonal part */ 944 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 945 946 /* copy x into the vec slvec0 */ 947 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 948 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 949 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 950 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 951 952 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 953 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 954 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 955 956 /* supperdiagonal part */ 957 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 958 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 964 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 965 { 966 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 967 PetscErrorCode ierr; 968 969 PetscFunctionBegin; 970 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 971 /* do diagonal part */ 972 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 973 /* do supperdiagonal part */ 974 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 975 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 976 977 /* do subdiagonal part */ 978 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 979 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 980 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 981 982 PetscFunctionReturn(0); 983 } 984 985 /* 986 This only works correctly for square matrices where the subblock A->A is the 987 diagonal block 988 */ 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 991 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 992 { 993 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 994 PetscErrorCode ierr; 995 996 PetscFunctionBegin; 997 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 998 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatScale_MPISBAIJ" 1004 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1005 { 1006 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1007 PetscErrorCode ierr; 1008 1009 PetscFunctionBegin; 1010 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1011 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1017 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1018 { 1019 PetscFunctionBegin; 1020 if (matin) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format"); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1026 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1027 { 1028 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1029 1030 PetscFunctionBegin; 1031 if (!baij->getrowactive) { 1032 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1033 } 1034 baij->getrowactive = PETSC_FALSE; 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1040 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1041 { 1042 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1043 PetscErrorCode ierr; 1044 1045 PetscFunctionBegin; 1046 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1047 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1053 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1054 { 1055 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1056 PetscErrorCode ierr; 1057 1058 PetscFunctionBegin; 1059 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1060 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1066 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1067 { 1068 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1069 PetscErrorCode ierr; 1070 1071 PetscFunctionBegin; 1072 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1073 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1079 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1080 { 1081 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1082 Mat A = a->A,B = a->B; 1083 PetscErrorCode ierr; 1084 PetscReal isend[5],irecv[5]; 1085 1086 PetscFunctionBegin; 1087 info->block_size = (PetscReal)matin->bs; 1088 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1089 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1090 isend[3] = info->memory; isend[4] = info->mallocs; 1091 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1092 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1093 isend[3] += info->memory; isend[4] += info->mallocs; 1094 if (flag == MAT_LOCAL) { 1095 info->nz_used = isend[0]; 1096 info->nz_allocated = isend[1]; 1097 info->nz_unneeded = isend[2]; 1098 info->memory = isend[3]; 1099 info->mallocs = isend[4]; 1100 } else if (flag == MAT_GLOBAL_MAX) { 1101 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1102 info->nz_used = irecv[0]; 1103 info->nz_allocated = irecv[1]; 1104 info->nz_unneeded = irecv[2]; 1105 info->memory = irecv[3]; 1106 info->mallocs = irecv[4]; 1107 } else if (flag == MAT_GLOBAL_SUM) { 1108 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1109 info->nz_used = irecv[0]; 1110 info->nz_allocated = irecv[1]; 1111 info->nz_unneeded = irecv[2]; 1112 info->memory = irecv[3]; 1113 info->mallocs = irecv[4]; 1114 } else { 1115 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1116 } 1117 info->rows_global = (PetscReal)A->M; 1118 info->columns_global = (PetscReal)A->N; 1119 info->rows_local = (PetscReal)A->m; 1120 info->columns_local = (PetscReal)A->N; 1121 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1122 info->fill_ratio_needed = 0; 1123 info->factor_mallocs = 0; 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1129 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1130 { 1131 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 switch (op) { 1136 case MAT_NO_NEW_NONZERO_LOCATIONS: 1137 case MAT_YES_NEW_NONZERO_LOCATIONS: 1138 case MAT_COLUMNS_UNSORTED: 1139 case MAT_COLUMNS_SORTED: 1140 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1141 case MAT_KEEP_ZEROED_ROWS: 1142 case MAT_NEW_NONZERO_LOCATION_ERR: 1143 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1144 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1145 break; 1146 case MAT_ROW_ORIENTED: 1147 a->roworiented = PETSC_TRUE; 1148 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1149 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1150 break; 1151 case MAT_ROWS_SORTED: 1152 case MAT_ROWS_UNSORTED: 1153 case MAT_YES_NEW_DIAGONALS: 1154 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1155 break; 1156 case MAT_COLUMN_ORIENTED: 1157 a->roworiented = PETSC_FALSE; 1158 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1159 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1160 break; 1161 case MAT_IGNORE_OFF_PROC_ENTRIES: 1162 a->donotstash = PETSC_TRUE; 1163 break; 1164 case MAT_NO_NEW_DIAGONALS: 1165 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1166 case MAT_USE_HASH_TABLE: 1167 a->ht_flag = PETSC_TRUE; 1168 break; 1169 case MAT_NOT_SYMMETRIC: 1170 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1171 case MAT_HERMITIAN: 1172 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1173 case MAT_SYMMETRIC: 1174 case MAT_STRUCTURALLY_SYMMETRIC: 1175 case MAT_NOT_HERMITIAN: 1176 case MAT_SYMMETRY_ETERNAL: 1177 case MAT_NOT_SYMMETRY_ETERNAL: 1178 break; 1179 default: 1180 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1187 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1188 { 1189 PetscErrorCode ierr; 1190 PetscFunctionBegin; 1191 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1197 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1198 { 1199 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1200 Mat a=baij->A, b=baij->B; 1201 PetscErrorCode ierr; 1202 PetscInt nv,m,n; 1203 PetscTruth flg; 1204 1205 PetscFunctionBegin; 1206 if (ll != rr){ 1207 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1208 if (!flg) 1209 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1210 } 1211 if (!ll) PetscFunctionReturn(0); 1212 1213 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1214 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1215 1216 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1217 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1218 1219 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1220 1221 /* left diagonalscale the off-diagonal part */ 1222 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1223 1224 /* scale the diagonal part */ 1225 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1226 1227 /* right diagonalscale the off-diagonal part */ 1228 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1229 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1235 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1236 { 1237 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1238 MPI_Comm comm = A->comm; 1239 static PetscTruth called = PETSC_FALSE; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 if (!a->rank) { 1244 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1245 } 1246 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1247 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1248 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1254 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1255 { 1256 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatEqual_MPISBAIJ" 1268 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1269 { 1270 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1271 Mat a,b,c,d; 1272 PetscTruth flg; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 a = matA->A; b = matA->B; 1277 c = matB->A; d = matB->B; 1278 1279 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1280 if (flg) { 1281 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1282 } 1283 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatCopy_MPISBAIJ" 1289 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1290 { 1291 PetscErrorCode ierr; 1292 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1293 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1294 1295 PetscFunctionBegin; 1296 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1297 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1298 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1299 } else { 1300 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1301 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1302 } 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1308 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1309 { 1310 PetscErrorCode ierr; 1311 1312 PetscFunctionBegin; 1313 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #include "petscblaslapack.h" 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1320 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1321 { 1322 PetscErrorCode ierr; 1323 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1324 PetscBLASInt bnz,one=1; 1325 Mat_SeqSBAIJ *xa,*ya; 1326 Mat_SeqBAIJ *xb,*yb; 1327 1328 PetscFunctionBegin; 1329 if (str == SAME_NONZERO_PATTERN) { 1330 PetscScalar alpha = a; 1331 xa = (Mat_SeqSBAIJ *)xx->A->data; 1332 ya = (Mat_SeqSBAIJ *)yy->A->data; 1333 bnz = (PetscBLASInt)xa->nz; 1334 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1335 xb = (Mat_SeqBAIJ *)xx->B->data; 1336 yb = (Mat_SeqBAIJ *)yy->B->data; 1337 bnz = (PetscBLASInt)xb->nz; 1338 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1339 } else { 1340 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1341 } 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1347 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1348 { 1349 PetscErrorCode ierr; 1350 PetscInt i; 1351 PetscTruth flg; 1352 1353 PetscFunctionBegin; 1354 for (i=0; i<n; i++) { 1355 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1356 if (!flg) { 1357 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1358 } 1359 } 1360 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 1365 /* -------------------------------------------------------------------*/ 1366 static struct _MatOps MatOps_Values = { 1367 MatSetValues_MPISBAIJ, 1368 MatGetRow_MPISBAIJ, 1369 MatRestoreRow_MPISBAIJ, 1370 MatMult_MPISBAIJ, 1371 /* 4*/ MatMultAdd_MPISBAIJ, 1372 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1373 MatMultAdd_MPISBAIJ, 1374 0, 1375 0, 1376 0, 1377 /*10*/ 0, 1378 0, 1379 0, 1380 MatRelax_MPISBAIJ, 1381 MatTranspose_MPISBAIJ, 1382 /*15*/ MatGetInfo_MPISBAIJ, 1383 MatEqual_MPISBAIJ, 1384 MatGetDiagonal_MPISBAIJ, 1385 MatDiagonalScale_MPISBAIJ, 1386 MatNorm_MPISBAIJ, 1387 /*20*/ MatAssemblyBegin_MPISBAIJ, 1388 MatAssemblyEnd_MPISBAIJ, 1389 0, 1390 MatSetOption_MPISBAIJ, 1391 MatZeroEntries_MPISBAIJ, 1392 /*25*/ 0, 1393 0, 1394 0, 1395 0, 1396 0, 1397 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1398 0, 1399 0, 1400 0, 1401 0, 1402 /*35*/ MatDuplicate_MPISBAIJ, 1403 0, 1404 0, 1405 0, 1406 0, 1407 /*40*/ MatAXPY_MPISBAIJ, 1408 MatGetSubMatrices_MPISBAIJ, 1409 MatIncreaseOverlap_MPISBAIJ, 1410 MatGetValues_MPISBAIJ, 1411 MatCopy_MPISBAIJ, 1412 /*45*/ MatPrintHelp_MPISBAIJ, 1413 MatScale_MPISBAIJ, 1414 0, 1415 0, 1416 0, 1417 /*50*/ 0, 1418 0, 1419 0, 1420 0, 1421 0, 1422 /*55*/ 0, 1423 0, 1424 MatSetUnfactored_MPISBAIJ, 1425 0, 1426 MatSetValuesBlocked_MPISBAIJ, 1427 /*60*/ 0, 1428 0, 1429 0, 1430 MatGetPetscMaps_Petsc, 1431 0, 1432 /*65*/ 0, 1433 0, 1434 0, 1435 0, 1436 0, 1437 /*70*/ MatGetRowMax_MPISBAIJ, 1438 0, 1439 0, 1440 0, 1441 0, 1442 /*75*/ 0, 1443 0, 1444 0, 1445 0, 1446 0, 1447 /*80*/ 0, 1448 0, 1449 0, 1450 0, 1451 MatLoad_MPISBAIJ, 1452 /*85*/ 0, 1453 0, 1454 0, 1455 0, 1456 0, 1457 /*90*/ 0, 1458 0, 1459 0, 1460 0, 1461 0, 1462 /*95*/ 0, 1463 0, 1464 0, 1465 0, 1466 0, 1467 /*100*/0, 1468 0, 1469 0, 1470 0, 1471 0, 1472 /*105*/0, 1473 MatRealPart_MPISBAIJ, 1474 MatImaginaryPart_MPISBAIJ 1475 }; 1476 1477 1478 EXTERN_C_BEGIN 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1481 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1482 { 1483 PetscFunctionBegin; 1484 *a = ((Mat_MPISBAIJ *)A->data)->A; 1485 *iscopy = PETSC_FALSE; 1486 PetscFunctionReturn(0); 1487 } 1488 EXTERN_C_END 1489 1490 EXTERN_C_BEGIN 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1493 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1494 { 1495 Mat_MPISBAIJ *b; 1496 PetscErrorCode ierr; 1497 PetscInt i,mbs,Mbs; 1498 1499 PetscFunctionBegin; 1500 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1501 1502 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1503 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1504 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1505 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1506 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1507 if (d_nnz) { 1508 for (i=0; i<B->m/bs; i++) { 1509 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1510 } 1511 } 1512 if (o_nnz) { 1513 for (i=0; i<B->m/bs; i++) { 1514 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1515 } 1516 } 1517 B->preallocated = PETSC_TRUE; 1518 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1519 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1520 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1521 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1522 1523 b = (Mat_MPISBAIJ*)B->data; 1524 mbs = B->m/bs; 1525 Mbs = B->M/bs; 1526 if (mbs*bs != B->m) { 1527 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs); 1528 } 1529 1530 B->bs = bs; 1531 b->bs2 = bs*bs; 1532 b->mbs = mbs; 1533 b->nbs = mbs; 1534 b->Mbs = Mbs; 1535 b->Nbs = Mbs; 1536 1537 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1538 b->rowners[0] = 0; 1539 for (i=2; i<=b->size; i++) { 1540 b->rowners[i] += b->rowners[i-1]; 1541 } 1542 b->rstart = b->rowners[b->rank]; 1543 b->rend = b->rowners[b->rank+1]; 1544 b->cstart = b->rstart; 1545 b->cend = b->rend; 1546 for (i=0; i<=b->size; i++) { 1547 b->rowners_bs[i] = b->rowners[i]*bs; 1548 } 1549 b->rstart_bs = b-> rstart*bs; 1550 b->rend_bs = b->rend*bs; 1551 1552 b->cstart_bs = b->cstart*bs; 1553 b->cend_bs = b->cend*bs; 1554 1555 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1556 ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr); 1557 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1558 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1559 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1560 1561 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1562 ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr); 1563 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1564 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1565 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1566 1567 /* build cache for off array entries formed */ 1568 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1569 1570 PetscFunctionReturn(0); 1571 } 1572 EXTERN_C_END 1573 1574 /*MC 1575 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1576 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1577 1578 Options Database Keys: 1579 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1580 1581 Level: beginner 1582 1583 .seealso: MatCreateMPISBAIJ 1584 M*/ 1585 1586 EXTERN_C_BEGIN 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatCreate_MPISBAIJ" 1589 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1590 { 1591 Mat_MPISBAIJ *b; 1592 PetscErrorCode ierr; 1593 PetscTruth flg; 1594 1595 PetscFunctionBegin; 1596 1597 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1598 B->data = (void*)b; 1599 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1600 1601 B->ops->destroy = MatDestroy_MPISBAIJ; 1602 B->ops->view = MatView_MPISBAIJ; 1603 B->mapping = 0; 1604 B->factor = 0; 1605 B->assembled = PETSC_FALSE; 1606 1607 B->insertmode = NOT_SET_VALUES; 1608 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1609 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1610 1611 /* build local table of row and column ownerships */ 1612 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 1613 b->cowners = b->rowners + b->size + 2; 1614 b->rowners_bs = b->cowners + b->size + 2; 1615 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1616 1617 /* build cache for off array entries formed */ 1618 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1619 b->donotstash = PETSC_FALSE; 1620 b->colmap = PETSC_NULL; 1621 b->garray = PETSC_NULL; 1622 b->roworiented = PETSC_TRUE; 1623 1624 #if defined(PETSC_USE_MAT_SINGLE) 1625 /* stuff for MatSetValues_XXX in single precision */ 1626 b->setvalueslen = 0; 1627 b->setvaluescopy = PETSC_NULL; 1628 #endif 1629 1630 /* stuff used in block assembly */ 1631 b->barray = 0; 1632 1633 /* stuff used for matrix vector multiply */ 1634 b->lvec = 0; 1635 b->Mvctx = 0; 1636 b->slvec0 = 0; 1637 b->slvec0b = 0; 1638 b->slvec1 = 0; 1639 b->slvec1a = 0; 1640 b->slvec1b = 0; 1641 b->sMvctx = 0; 1642 1643 /* stuff for MatGetRow() */ 1644 b->rowindices = 0; 1645 b->rowvalues = 0; 1646 b->getrowactive = PETSC_FALSE; 1647 1648 /* hash table stuff */ 1649 b->ht = 0; 1650 b->hd = 0; 1651 b->ht_size = 0; 1652 b->ht_flag = PETSC_FALSE; 1653 b->ht_fact = 0; 1654 b->ht_total_ct = 0; 1655 b->ht_insert_ct = 0; 1656 1657 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1658 if (flg) { 1659 PetscReal fact = 1.39; 1660 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1661 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1662 if (fact <= 1.0) fact = 1.39; 1663 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1664 ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 1665 } 1666 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1667 "MatStoreValues_MPISBAIJ", 1668 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1669 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1670 "MatRetrieveValues_MPISBAIJ", 1671 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1672 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1673 "MatGetDiagonalBlock_MPISBAIJ", 1674 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1675 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1676 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1677 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1678 B->symmetric = PETSC_TRUE; 1679 B->structurally_symmetric = PETSC_TRUE; 1680 B->symmetric_set = PETSC_TRUE; 1681 B->structurally_symmetric_set = PETSC_TRUE; 1682 PetscFunctionReturn(0); 1683 } 1684 EXTERN_C_END 1685 1686 /*MC 1687 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1688 1689 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1690 and MATMPISBAIJ otherwise. 1691 1692 Options Database Keys: 1693 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1694 1695 Level: beginner 1696 1697 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1698 M*/ 1699 1700 EXTERN_C_BEGIN 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatCreate_SBAIJ" 1703 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1704 { 1705 PetscErrorCode ierr; 1706 PetscMPIInt size; 1707 1708 PetscFunctionBegin; 1709 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1710 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1711 if (size == 1) { 1712 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1713 } else { 1714 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1715 } 1716 PetscFunctionReturn(0); 1717 } 1718 EXTERN_C_END 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1722 /*@C 1723 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1724 the user should preallocate the matrix storage by setting the parameters 1725 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1726 performance can be increased by more than a factor of 50. 1727 1728 Collective on Mat 1729 1730 Input Parameters: 1731 + A - the matrix 1732 . bs - size of blockk 1733 . d_nz - number of block nonzeros per block row in diagonal portion of local 1734 submatrix (same for all local rows) 1735 . d_nnz - array containing the number of block nonzeros in the various block rows 1736 in the upper triangular and diagonal part of the in diagonal portion of the local 1737 (possibly different for each block row) or PETSC_NULL. You must leave room 1738 for the diagonal entry even if it is zero. 1739 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1740 submatrix (same for all local rows). 1741 - o_nnz - array containing the number of nonzeros in the various block rows of the 1742 off-diagonal portion of the local submatrix (possibly different for 1743 each block row) or PETSC_NULL. 1744 1745 1746 Options Database Keys: 1747 . -mat_no_unroll - uses code that does not unroll the loops in the 1748 block calculations (much slower) 1749 . -mat_block_size - size of the blocks to use 1750 1751 Notes: 1752 1753 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1754 than it must be used on all processors that share the object for that argument. 1755 1756 If the *_nnz parameter is given then the *_nz parameter is ignored 1757 1758 Storage Information: 1759 For a square global matrix we define each processor's diagonal portion 1760 to be its local rows and the corresponding columns (a square submatrix); 1761 each processor's off-diagonal portion encompasses the remainder of the 1762 local matrix (a rectangular submatrix). 1763 1764 The user can specify preallocated storage for the diagonal part of 1765 the local submatrix with either d_nz or d_nnz (not both). Set 1766 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1767 memory allocation. Likewise, specify preallocated storage for the 1768 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1769 1770 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1771 the figure below we depict these three local rows and all columns (0-11). 1772 1773 .vb 1774 0 1 2 3 4 5 6 7 8 9 10 11 1775 ------------------- 1776 row 3 | o o o d d d o o o o o o 1777 row 4 | o o o d d d o o o o o o 1778 row 5 | o o o d d d o o o o o o 1779 ------------------- 1780 .ve 1781 1782 Thus, any entries in the d locations are stored in the d (diagonal) 1783 submatrix, and any entries in the o locations are stored in the 1784 o (off-diagonal) submatrix. Note that the d matrix is stored in 1785 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1786 1787 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1788 plus the diagonal part of the d matrix, 1789 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1790 In general, for PDE problems in which most nonzeros are near the diagonal, 1791 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1792 or you will get TERRIBLE performance; see the users' manual chapter on 1793 matrices. 1794 1795 Level: intermediate 1796 1797 .keywords: matrix, block, aij, compressed row, sparse, parallel 1798 1799 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1800 @*/ 1801 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1802 { 1803 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1804 1805 PetscFunctionBegin; 1806 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1807 if (f) { 1808 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1809 } 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatCreateMPISBAIJ" 1815 /*@C 1816 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1817 (block compressed row). For good matrix assembly performance 1818 the user should preallocate the matrix storage by setting the parameters 1819 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1820 performance can be increased by more than a factor of 50. 1821 1822 Collective on MPI_Comm 1823 1824 Input Parameters: 1825 + comm - MPI communicator 1826 . bs - size of blockk 1827 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1828 This value should be the same as the local size used in creating the 1829 y vector for the matrix-vector product y = Ax. 1830 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1831 This value should be the same as the local size used in creating the 1832 x vector for the matrix-vector product y = Ax. 1833 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1834 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1835 . d_nz - number of block nonzeros per block row in diagonal portion of local 1836 submatrix (same for all local rows) 1837 . d_nnz - array containing the number of block nonzeros in the various block rows 1838 in the upper triangular portion of the in diagonal portion of the local 1839 (possibly different for each block block row) or PETSC_NULL. 1840 You must leave room for the diagonal entry even if it is zero. 1841 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1842 submatrix (same for all local rows). 1843 - o_nnz - array containing the number of nonzeros in the various block rows of the 1844 off-diagonal portion of the local submatrix (possibly different for 1845 each block row) or PETSC_NULL. 1846 1847 Output Parameter: 1848 . A - the matrix 1849 1850 Options Database Keys: 1851 . -mat_no_unroll - uses code that does not unroll the loops in the 1852 block calculations (much slower) 1853 . -mat_block_size - size of the blocks to use 1854 . -mat_mpi - use the parallel matrix data structures even on one processor 1855 (defaults to using SeqBAIJ format on one processor) 1856 1857 Notes: 1858 The number of rows and columns must be divisible by blocksize. 1859 1860 The user MUST specify either the local or global matrix dimensions 1861 (possibly both). 1862 1863 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1864 than it must be used on all processors that share the object for that argument. 1865 1866 If the *_nnz parameter is given then the *_nz parameter is ignored 1867 1868 Storage Information: 1869 For a square global matrix we define each processor's diagonal portion 1870 to be its local rows and the corresponding columns (a square submatrix); 1871 each processor's off-diagonal portion encompasses the remainder of the 1872 local matrix (a rectangular submatrix). 1873 1874 The user can specify preallocated storage for the diagonal part of 1875 the local submatrix with either d_nz or d_nnz (not both). Set 1876 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1877 memory allocation. Likewise, specify preallocated storage for the 1878 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1879 1880 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1881 the figure below we depict these three local rows and all columns (0-11). 1882 1883 .vb 1884 0 1 2 3 4 5 6 7 8 9 10 11 1885 ------------------- 1886 row 3 | o o o d d d o o o o o o 1887 row 4 | o o o d d d o o o o o o 1888 row 5 | o o o d d d o o o o o o 1889 ------------------- 1890 .ve 1891 1892 Thus, any entries in the d locations are stored in the d (diagonal) 1893 submatrix, and any entries in the o locations are stored in the 1894 o (off-diagonal) submatrix. Note that the d matrix is stored in 1895 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1896 1897 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1898 plus the diagonal part of the d matrix, 1899 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1900 In general, for PDE problems in which most nonzeros are near the diagonal, 1901 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1902 or you will get TERRIBLE performance; see the users' manual chapter on 1903 matrices. 1904 1905 Level: intermediate 1906 1907 .keywords: matrix, block, aij, compressed row, sparse, parallel 1908 1909 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1910 @*/ 1911 1912 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 1913 { 1914 PetscErrorCode ierr; 1915 PetscMPIInt size; 1916 1917 PetscFunctionBegin; 1918 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1919 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1920 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1921 if (size > 1) { 1922 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1923 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1924 } else { 1925 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1926 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1927 } 1928 PetscFunctionReturn(0); 1929 } 1930 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1934 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1935 { 1936 Mat mat; 1937 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1938 PetscErrorCode ierr; 1939 PetscInt len=0,nt,bs=matin->bs,mbs=oldmat->mbs; 1940 PetscScalar *array; 1941 1942 PetscFunctionBegin; 1943 *newmat = 0; 1944 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1945 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1946 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1947 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1948 1949 mat->factor = matin->factor; 1950 mat->preallocated = PETSC_TRUE; 1951 mat->assembled = PETSC_TRUE; 1952 mat->insertmode = NOT_SET_VALUES; 1953 1954 a = (Mat_MPISBAIJ*)mat->data; 1955 mat->bs = matin->bs; 1956 a->bs2 = oldmat->bs2; 1957 a->mbs = oldmat->mbs; 1958 a->nbs = oldmat->nbs; 1959 a->Mbs = oldmat->Mbs; 1960 a->Nbs = oldmat->Nbs; 1961 1962 a->rstart = oldmat->rstart; 1963 a->rend = oldmat->rend; 1964 a->cstart = oldmat->cstart; 1965 a->cend = oldmat->cend; 1966 a->size = oldmat->size; 1967 a->rank = oldmat->rank; 1968 a->donotstash = oldmat->donotstash; 1969 a->roworiented = oldmat->roworiented; 1970 a->rowindices = 0; 1971 a->rowvalues = 0; 1972 a->getrowactive = PETSC_FALSE; 1973 a->barray = 0; 1974 a->rstart_bs = oldmat->rstart_bs; 1975 a->rend_bs = oldmat->rend_bs; 1976 a->cstart_bs = oldmat->cstart_bs; 1977 a->cend_bs = oldmat->cend_bs; 1978 1979 /* hash table stuff */ 1980 a->ht = 0; 1981 a->hd = 0; 1982 a->ht_size = 0; 1983 a->ht_flag = oldmat->ht_flag; 1984 a->ht_fact = oldmat->ht_fact; 1985 a->ht_total_ct = 0; 1986 a->ht_insert_ct = 0; 1987 1988 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1989 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1990 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 1991 if (oldmat->colmap) { 1992 #if defined (PETSC_USE_CTABLE) 1993 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1994 #else 1995 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1996 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1997 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1998 #endif 1999 } else a->colmap = 0; 2000 2001 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2002 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2003 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2004 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2005 } else a->garray = 0; 2006 2007 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2008 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2009 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2010 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2011 2012 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2013 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2014 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2015 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2016 2017 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2018 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2019 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2020 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2021 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2022 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2023 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2024 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2025 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2026 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2027 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2028 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2029 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2030 2031 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2032 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2033 a->sMvctx = oldmat->sMvctx; 2034 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2035 2036 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2037 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2038 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2039 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2040 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2041 *newmat = mat; 2042 PetscFunctionReturn(0); 2043 } 2044 2045 #include "petscsys.h" 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatLoad_MPISBAIJ" 2049 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2050 { 2051 Mat A; 2052 PetscErrorCode ierr; 2053 PetscInt i,nz,j,rstart,rend; 2054 PetscScalar *vals,*buf; 2055 MPI_Comm comm = ((PetscObject)viewer)->comm; 2056 MPI_Status status; 2057 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 2058 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2059 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2060 PetscInt bs=1,Mbs,mbs,extra_rows; 2061 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2062 PetscInt dcount,kmax,k,nzcount,tmp; 2063 int fd; 2064 2065 PetscFunctionBegin; 2066 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2067 2068 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2069 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2070 if (!rank) { 2071 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2072 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2073 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2074 if (header[3] < 0) { 2075 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2076 } 2077 } 2078 2079 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2080 M = header[1]; N = header[2]; 2081 2082 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2083 2084 /* 2085 This code adds extra rows to make sure the number of rows is 2086 divisible by the blocksize 2087 */ 2088 Mbs = M/bs; 2089 extra_rows = bs - M + bs*(Mbs); 2090 if (extra_rows == bs) extra_rows = 0; 2091 else Mbs++; 2092 if (extra_rows &&!rank) { 2093 ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2094 } 2095 2096 /* determine ownership of all rows */ 2097 mbs = Mbs/size + ((Mbs % size) > rank); 2098 m = mbs*bs; 2099 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2100 browners = rowners + size + 1; 2101 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2102 rowners[0] = 0; 2103 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2104 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2105 rstart = rowners[rank]; 2106 rend = rowners[rank+1]; 2107 2108 /* distribute row lengths to all processors */ 2109 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2110 if (!rank) { 2111 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2112 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2113 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2114 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2115 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2116 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2117 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2118 } else { 2119 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2120 } 2121 2122 if (!rank) { /* procs[0] */ 2123 /* calculate the number of nonzeros on each processor */ 2124 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2125 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2126 for (i=0; i<size; i++) { 2127 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2128 procsnz[i] += rowlengths[j]; 2129 } 2130 } 2131 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2132 2133 /* determine max buffer needed and allocate it */ 2134 maxnz = 0; 2135 for (i=0; i<size; i++) { 2136 maxnz = PetscMax(maxnz,procsnz[i]); 2137 } 2138 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2139 2140 /* read in my part of the matrix column indices */ 2141 nz = procsnz[0]; 2142 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2143 mycols = ibuf; 2144 if (size == 1) nz -= extra_rows; 2145 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2146 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2147 2148 /* read in every ones (except the last) and ship off */ 2149 for (i=1; i<size-1; i++) { 2150 nz = procsnz[i]; 2151 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2152 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2153 } 2154 /* read in the stuff for the last proc */ 2155 if (size != 1) { 2156 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2157 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2158 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2159 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2160 } 2161 ierr = PetscFree(cols);CHKERRQ(ierr); 2162 } else { /* procs[i], i>0 */ 2163 /* determine buffer space needed for message */ 2164 nz = 0; 2165 for (i=0; i<m; i++) { 2166 nz += locrowlens[i]; 2167 } 2168 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2169 mycols = ibuf; 2170 /* receive message of column indices*/ 2171 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2172 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2173 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2174 } 2175 2176 /* loop over local rows, determining number of off diagonal entries */ 2177 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2178 odlens = dlens + (rend-rstart); 2179 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2180 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2181 masked1 = mask + Mbs; 2182 masked2 = masked1 + Mbs; 2183 rowcount = 0; nzcount = 0; 2184 for (i=0; i<mbs; i++) { 2185 dcount = 0; 2186 odcount = 0; 2187 for (j=0; j<bs; j++) { 2188 kmax = locrowlens[rowcount]; 2189 for (k=0; k<kmax; k++) { 2190 tmp = mycols[nzcount++]/bs; /* block col. index */ 2191 if (!mask[tmp]) { 2192 mask[tmp] = 1; 2193 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2194 else masked1[dcount++] = tmp; /* entry in diag portion */ 2195 } 2196 } 2197 rowcount++; 2198 } 2199 2200 dlens[i] = dcount; /* d_nzz[i] */ 2201 odlens[i] = odcount; /* o_nzz[i] */ 2202 2203 /* zero out the mask elements we set */ 2204 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2205 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2206 } 2207 2208 /* create our matrix */ 2209 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2210 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2211 ierr = MatSetType(A,type);CHKERRQ(ierr); 2212 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2213 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2214 2215 if (!rank) { 2216 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2217 /* read in my part of the matrix numerical values */ 2218 nz = procsnz[0]; 2219 vals = buf; 2220 mycols = ibuf; 2221 if (size == 1) nz -= extra_rows; 2222 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2223 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2224 2225 /* insert into matrix */ 2226 jj = rstart*bs; 2227 for (i=0; i<m; i++) { 2228 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2229 mycols += locrowlens[i]; 2230 vals += locrowlens[i]; 2231 jj++; 2232 } 2233 2234 /* read in other processors (except the last one) and ship out */ 2235 for (i=1; i<size-1; i++) { 2236 nz = procsnz[i]; 2237 vals = buf; 2238 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2239 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2240 } 2241 /* the last proc */ 2242 if (size != 1){ 2243 nz = procsnz[i] - extra_rows; 2244 vals = buf; 2245 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2246 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2247 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2248 } 2249 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2250 2251 } else { 2252 /* receive numeric values */ 2253 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2254 2255 /* receive message of values*/ 2256 vals = buf; 2257 mycols = ibuf; 2258 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2259 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2260 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2261 2262 /* insert into matrix */ 2263 jj = rstart*bs; 2264 for (i=0; i<m; i++) { 2265 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2266 mycols += locrowlens[i]; 2267 vals += locrowlens[i]; 2268 jj++; 2269 } 2270 } 2271 2272 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2273 ierr = PetscFree(buf);CHKERRQ(ierr); 2274 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2275 ierr = PetscFree(rowners);CHKERRQ(ierr); 2276 ierr = PetscFree(dlens);CHKERRQ(ierr); 2277 ierr = PetscFree(mask);CHKERRQ(ierr); 2278 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2279 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2280 *newmat = A; 2281 PetscFunctionReturn(0); 2282 } 2283 2284 #undef __FUNCT__ 2285 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2286 /*XXXXX@ 2287 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2288 2289 Input Parameters: 2290 . mat - the matrix 2291 . fact - factor 2292 2293 Collective on Mat 2294 2295 Level: advanced 2296 2297 Notes: 2298 This can also be set by the command line option: -mat_use_hash_table fact 2299 2300 .keywords: matrix, hashtable, factor, HT 2301 2302 .seealso: MatSetOption() 2303 @XXXXX*/ 2304 2305 2306 #undef __FUNCT__ 2307 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2308 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2309 { 2310 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2311 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2312 PetscReal atmp; 2313 PetscReal *work,*svalues,*rvalues; 2314 PetscErrorCode ierr; 2315 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2316 PetscMPIInt rank,size; 2317 PetscInt *rowners_bs,dest,count,source; 2318 PetscScalar *va; 2319 MatScalar *ba; 2320 MPI_Status stat; 2321 2322 PetscFunctionBegin; 2323 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2324 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2325 2326 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2327 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2328 2329 bs = A->bs; 2330 mbs = a->mbs; 2331 Mbs = a->Mbs; 2332 ba = b->a; 2333 bi = b->i; 2334 bj = b->j; 2335 2336 /* find ownerships */ 2337 rowners_bs = a->rowners_bs; 2338 2339 /* each proc creates an array to be distributed */ 2340 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2341 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2342 2343 /* row_max for B */ 2344 if (rank != size-1){ 2345 for (i=0; i<mbs; i++) { 2346 ncols = bi[1] - bi[0]; bi++; 2347 brow = bs*i; 2348 for (j=0; j<ncols; j++){ 2349 bcol = bs*(*bj); 2350 for (kcol=0; kcol<bs; kcol++){ 2351 col = bcol + kcol; /* local col index */ 2352 col += rowners_bs[rank+1]; /* global col index */ 2353 for (krow=0; krow<bs; krow++){ 2354 atmp = PetscAbsScalar(*ba); ba++; 2355 row = brow + krow; /* local row index */ 2356 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2357 if (work[col] < atmp) work[col] = atmp; 2358 } 2359 } 2360 bj++; 2361 } 2362 } 2363 2364 /* send values to its owners */ 2365 for (dest=rank+1; dest<size; dest++){ 2366 svalues = work + rowners_bs[dest]; 2367 count = rowners_bs[dest+1]-rowners_bs[dest]; 2368 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2369 } 2370 } 2371 2372 /* receive values */ 2373 if (rank){ 2374 rvalues = work; 2375 count = rowners_bs[rank+1]-rowners_bs[rank]; 2376 for (source=0; source<rank; source++){ 2377 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2378 /* process values */ 2379 for (i=0; i<count; i++){ 2380 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2381 } 2382 } 2383 } 2384 2385 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2386 ierr = PetscFree(work);CHKERRQ(ierr); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatRelax_MPISBAIJ" 2392 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2393 { 2394 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2395 PetscErrorCode ierr; 2396 PetscInt mbs=mat->mbs,bs=matin->bs; 2397 PetscScalar *x,*b,*ptr,zero=0.0; 2398 Vec bb1; 2399 2400 PetscFunctionBegin; 2401 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2402 if (bs > 1) 2403 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2404 2405 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2406 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2407 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2408 its--; 2409 } 2410 2411 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2412 while (its--){ 2413 2414 /* lower triangular part: slvec0b = - B^T*xx */ 2415 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2416 2417 /* copy xx into slvec0a */ 2418 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2419 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2420 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2421 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2422 2423 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2424 2425 /* copy bb into slvec1a */ 2426 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2427 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2428 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2429 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2430 2431 /* set slvec1b = 0 */ 2432 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2433 2434 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2435 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2436 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2437 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2438 2439 /* upper triangular part: bb1 = bb1 - B*x */ 2440 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2441 2442 /* local diagonal sweep */ 2443 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2444 } 2445 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2446 } else { 2447 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2448 } 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2454 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2455 { 2456 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2457 PetscErrorCode ierr; 2458 Vec lvec1,bb1; 2459 2460 PetscFunctionBegin; 2461 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2462 if (matin->bs > 1) 2463 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2464 2465 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2466 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2467 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2468 its--; 2469 } 2470 2471 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2472 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2473 while (its--){ 2474 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2475 2476 /* lower diagonal part: bb1 = bb - B^T*xx */ 2477 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2478 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2479 2480 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2481 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2482 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2483 2484 /* upper diagonal part: bb1 = bb1 - B*x */ 2485 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2486 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2487 2488 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2489 2490 /* diagonal sweep */ 2491 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2492 } 2493 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2494 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2495 } else { 2496 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2497 } 2498 PetscFunctionReturn(0); 2499 } 2500 2501