1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 104 N = nrow++ - 1; \ 105 /* shift up all the later entries in this row */ \ 106 for (ii=N; ii>=_i; ii--) { \ 107 rp[ii+1] = rp[ii]; \ 108 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 109 } \ 110 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 111 rp[_i] = bcol; \ 112 ap[bs2*_i + bs*cidx + ridx] = value; \ 113 a_noinsert:; \ 114 ailen[brow] = nrow; \ 115 } 116 #ifndef MatSetValues_SeqBAIJ_B_Private 117 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 118 { \ 119 brow = row/bs; \ 120 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 121 rmax = bimax[brow]; nrow = bilen[brow]; \ 122 bcol = col/bs; \ 123 ridx = row % bs; cidx = col % bs; \ 124 low = 0; high = nrow; \ 125 while (high-low > 3) { \ 126 t = (low+high)/2; \ 127 if (rp[t] > bcol) high = t; \ 128 else low = t; \ 129 } \ 130 for (_i=low; _i<high; _i++) { \ 131 if (rp[_i] > bcol) break; \ 132 if (rp[_i] == bcol) { \ 133 bap = ap + bs2*_i + bs*cidx + ridx; \ 134 if (addv == ADD_VALUES) *bap += value; \ 135 else *bap = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (b->nonew == 1) goto b_noinsert; \ 140 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 141 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 b_noinsert:; \ 152 bilen[brow] = nrow; \ 153 } 154 #endif 155 156 #if defined(PETSC_USE_MAT_SINGLE) 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetValues_MPISBAIJ" 159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 160 { 161 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,N = m*n; 164 MatScalar *vsingle; 165 166 PetscFunctionBegin; 167 if (N > b->setvalueslen) { 168 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 169 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 170 b->setvalueslen = N; 171 } 172 vsingle = b->setvaluescopy; 173 174 for (i=0; i<N; i++) { 175 vsingle[i] = v[i]; 176 } 177 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 184 { 185 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 186 PetscErrorCode ierr; 187 PetscInt i,N = m*n*b->bs2; 188 MatScalar *vsingle; 189 190 PetscFunctionBegin; 191 if (N > b->setvalueslen) { 192 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 193 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 194 b->setvalueslen = N; 195 } 196 vsingle = b->setvaluescopy; 197 for (i=0; i<N; i++) { 198 vsingle[i] = v[i]; 199 } 200 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 #endif 204 205 /* Only add/insert a(i,j) with i<=j (blocks). 206 Any a(i,j) with i>j input by user is ingored. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 211 { 212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 213 MatScalar value; 214 PetscTruth roworiented = baij->roworiented; 215 PetscErrorCode ierr; 216 PetscInt i,j,row,col; 217 PetscInt rstart_orig=baij->rstart_bs; 218 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 219 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 220 221 /* Some Variables required in the macro */ 222 Mat A = baij->A; 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 224 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 225 MatScalar *aa=a->a; 226 227 Mat B = baij->B; 228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 229 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 230 MatScalar *ba=b->a; 231 232 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 233 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 234 MatScalar *ap,*bap; 235 236 /* for stash */ 237 PetscInt n_loc, *in_loc=0; 238 MatScalar *v_loc=0; 239 240 PetscFunctionBegin; 241 242 if(!baij->donotstash){ 243 ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr); 244 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 245 } 246 247 for (i=0; i<m; i++) { 248 if (im[i] < 0) continue; 249 #if defined(PETSC_USE_DEBUG) 250 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 251 #endif 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 253 row = im[i] - rstart_orig; /* local row index */ 254 for (j=0; j<n; j++) { 255 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 256 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 257 col = in[j] - cstart_orig; /* local col index */ 258 brow = row/bs; bcol = col/bs; 259 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 260 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 261 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } else if (in[j] < 0) continue; 264 #if defined(PETSC_USE_DEBUG) 265 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 266 #endif 267 else { /* off-diag entry (B) */ 268 if (mat->was_assembled) { 269 if (!baij->colmap) { 270 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 271 } 272 #if defined (PETSC_USE_CTABLE) 273 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 274 col = col - 1; 275 #else 276 col = baij->colmap[in[j]/bs] - 1; 277 #endif 278 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 279 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 280 col = in[j]; 281 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 282 B = baij->B; 283 b = (Mat_SeqBAIJ*)(B)->data; 284 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 285 ba=b->a; 286 } else col += in[j]%bs; 287 } else col = in[j]; 288 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 289 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 290 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 291 } 292 } 293 } else { /* off processor entry */ 294 if (!baij->donotstash) { 295 n_loc = 0; 296 for (j=0; j<n; j++){ 297 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 298 in_loc[n_loc] = in[j]; 299 if (roworiented) { 300 v_loc[n_loc] = v[i*n+j]; 301 } else { 302 v_loc[n_loc] = v[j*m+i]; 303 } 304 n_loc++; 305 } 306 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 307 } 308 } 309 } 310 311 if(!baij->donotstash){ 312 ierr = PetscFree(in_loc);CHKERRQ(ierr); 313 ierr = PetscFree(v_loc);CHKERRQ(ierr); 314 } 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 321 { 322 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 323 const MatScalar *value; 324 MatScalar *barray=baij->barray; 325 PetscTruth roworiented = baij->roworiented; 326 PetscErrorCode ierr; 327 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 328 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 329 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 330 331 PetscFunctionBegin; 332 if(!barray) { 333 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 334 baij->barray = barray; 335 } 336 337 if (roworiented) { 338 stepval = (n-1)*bs; 339 } else { 340 stepval = (m-1)*bs; 341 } 342 for (i=0; i<m; i++) { 343 if (im[i] < 0) continue; 344 #if defined(PETSC_USE_DEBUG) 345 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 346 #endif 347 if (im[i] >= rstart && im[i] < rend) { 348 row = im[i] - rstart; 349 for (j=0; j<n; j++) { 350 /* If NumCol = 1 then a copy is not required */ 351 if ((roworiented) && (n == 1)) { 352 barray = (MatScalar*) v + i*bs2; 353 } else if((!roworiented) && (m == 1)) { 354 barray = (MatScalar*) v + j*bs2; 355 } else { /* Here a copy is required */ 356 if (roworiented) { 357 value = v + i*(stepval+bs)*bs + j*bs; 358 } else { 359 value = v + j*(stepval+bs)*bs + i*bs; 360 } 361 for (ii=0; ii<bs; ii++,value+=stepval) { 362 for (jj=0; jj<bs; jj++) { 363 *barray++ = *value++; 364 } 365 } 366 barray -=bs2; 367 } 368 369 if (in[j] >= cstart && in[j] < cend){ 370 col = in[j] - cstart; 371 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 372 } 373 else if (in[j] < 0) continue; 374 #if defined(PETSC_USE_DEBUG) 375 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 376 #endif 377 else { 378 if (mat->was_assembled) { 379 if (!baij->colmap) { 380 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 381 } 382 383 #if defined(PETSC_USE_DEBUG) 384 #if defined (PETSC_USE_CTABLE) 385 { PetscInt data; 386 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 387 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 388 } 389 #else 390 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 391 #endif 392 #endif 393 #if defined (PETSC_USE_CTABLE) 394 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 395 col = (col - 1)/bs; 396 #else 397 col = (baij->colmap[in[j]] - 1)/bs; 398 #endif 399 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 400 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 401 col = in[j]; 402 } 403 } 404 else col = in[j]; 405 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 406 } 407 } 408 } else { 409 if (!baij->donotstash) { 410 if (roworiented) { 411 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 412 } else { 413 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 414 } 415 } 416 } 417 } 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatGetValues_MPISBAIJ" 423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 424 { 425 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 426 PetscErrorCode ierr; 427 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 428 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 429 430 PetscFunctionBegin; 431 for (i=0; i<m; i++) { 432 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 433 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 434 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 435 row = idxm[i] - bsrstart; 436 for (j=0; j<n; j++) { 437 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 438 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 439 if (idxn[j] >= bscstart && idxn[j] < bscend){ 440 col = idxn[j] - bscstart; 441 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } else { 443 if (!baij->colmap) { 444 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 445 } 446 #if defined (PETSC_USE_CTABLE) 447 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 448 data --; 449 #else 450 data = baij->colmap[idxn[j]/bs]-1; 451 #endif 452 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 453 else { 454 col = data + idxn[j]%bs; 455 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 456 } 457 } 458 } 459 } else { 460 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 461 } 462 } 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatNorm_MPISBAIJ" 468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 469 { 470 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 471 PetscErrorCode ierr; 472 PetscReal sum[2],*lnorm2; 473 474 PetscFunctionBegin; 475 if (baij->size == 1) { 476 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 477 } else { 478 if (type == NORM_FROBENIUS) { 479 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 480 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 481 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 482 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 483 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 484 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 485 *norm = sqrt(sum[0] + 2*sum[1]); 486 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 487 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 488 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 489 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 490 PetscReal *rsum,*rsum2,vabs; 491 PetscInt *jj,*garray=baij->garray,rstart=baij->rstart,nz; 492 PetscInt brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs; 493 MatScalar *v; 494 495 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 496 rsum2 = rsum + mat->N; 497 ierr = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 498 /* Amat */ 499 v = amat->a; jj = amat->j; 500 for (brow=0; brow<mbs; brow++) { 501 grow = bs*(rstart + brow); 502 nz = amat->i[brow+1] - amat->i[brow]; 503 for (bcol=0; bcol<nz; bcol++){ 504 gcol = bs*(rstart + *jj); jj++; 505 for (col=0; col<bs; col++){ 506 for (row=0; row<bs; row++){ 507 vabs = PetscAbsScalar(*v); v++; 508 rsum[gcol+col] += vabs; 509 /* non-diagonal block */ 510 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 511 } 512 } 513 } 514 } 515 /* Bmat */ 516 v = bmat->a; jj = bmat->j; 517 for (brow=0; brow<mbs; brow++) { 518 grow = bs*(rstart + brow); 519 nz = bmat->i[brow+1] - bmat->i[brow]; 520 for (bcol=0; bcol<nz; bcol++){ 521 gcol = bs*garray[*jj]; jj++; 522 for (col=0; col<bs; col++){ 523 for (row=0; row<bs; row++){ 524 vabs = PetscAbsScalar(*v); v++; 525 rsum[gcol+col] += vabs; 526 rsum[grow+row] += vabs; 527 } 528 } 529 } 530 } 531 ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 532 *norm = 0.0; 533 for (col=0; col<mat->N; col++) { 534 if (rsum2[col] > *norm) *norm = rsum2[col]; 535 } 536 ierr = PetscFree(rsum);CHKERRQ(ierr); 537 } else { 538 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 539 } 540 } 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 547 { 548 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 549 PetscErrorCode ierr; 550 PetscInt nstash,reallocs; 551 InsertMode addv; 552 553 PetscFunctionBegin; 554 if (baij->donotstash) { 555 PetscFunctionReturn(0); 556 } 557 558 /* make sure all processors are either in INSERTMODE or ADDMODE */ 559 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 560 if (addv == (ADD_VALUES|INSERT_VALUES)) { 561 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 562 } 563 mat->insertmode = addv; /* in case this processor had no cache */ 564 565 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 566 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 567 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 568 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 569 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 570 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 577 { 578 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 579 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 580 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 581 PetscErrorCode ierr; 582 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 583 PetscInt *row,*col,other_disassembled; 584 PetscMPIInt n; 585 PetscTruth r1,r2,r3; 586 MatScalar *val; 587 InsertMode addv = mat->insertmode; 588 589 PetscFunctionBegin; 590 591 if (!baij->donotstash) { 592 while (1) { 593 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 594 if (!flg) break; 595 596 for (i=0; i<n;) { 597 /* Now identify the consecutive vals belonging to the same row */ 598 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 599 if (j < n) ncols = j-i; 600 else ncols = n-i; 601 /* Now assemble all these values with a single function call */ 602 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 603 i = j; 604 } 605 } 606 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 607 /* Now process the block-stash. Since the values are stashed column-oriented, 608 set the roworiented flag to column oriented, and after MatSetValues() 609 restore the original flags */ 610 r1 = baij->roworiented; 611 r2 = a->roworiented; 612 r3 = b->roworiented; 613 baij->roworiented = PETSC_FALSE; 614 a->roworiented = PETSC_FALSE; 615 b->roworiented = PETSC_FALSE; 616 while (1) { 617 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 618 if (!flg) break; 619 620 for (i=0; i<n;) { 621 /* Now identify the consecutive vals belonging to the same row */ 622 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 623 if (j < n) ncols = j-i; 624 else ncols = n-i; 625 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 626 i = j; 627 } 628 } 629 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 630 baij->roworiented = r1; 631 a->roworiented = r2; 632 b->roworiented = r3; 633 } 634 635 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 636 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 637 638 /* determine if any processor has disassembled, if so we must 639 also disassemble ourselfs, in order that we may reassemble. */ 640 /* 641 if nonzero structure of submatrix B cannot change then we know that 642 no processor disassembled thus we can skip this stuff 643 */ 644 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 645 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 646 if (mat->was_assembled && !other_disassembled) { 647 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 648 } 649 } 650 651 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 652 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 653 } 654 b->compressedrow.use = PETSC_TRUE; 655 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 656 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 657 658 if (baij->rowvalues) { 659 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 660 baij->rowvalues = 0; 661 } 662 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt bs = mat->bs; 673 PetscMPIInt size = baij->size,rank = baij->rank; 674 PetscTruth iascii,isdraw; 675 PetscViewer sviewer; 676 PetscViewerFormat format; 677 678 PetscFunctionBegin; 679 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 680 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 681 if (iascii) { 682 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 683 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 684 MatInfo info; 685 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 686 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 687 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 688 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 689 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 690 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 692 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 694 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 695 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } else if (format == PETSC_VIEWER_ASCII_INFO) { 698 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 701 PetscFunctionReturn(0); 702 } 703 } 704 705 if (isdraw) { 706 PetscDraw draw; 707 PetscTruth isnull; 708 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 709 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 710 } 711 712 if (size == 1) { 713 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 714 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 715 } else { 716 /* assemble the entire matrix onto first processor. */ 717 Mat A; 718 Mat_SeqSBAIJ *Aloc; 719 Mat_SeqBAIJ *Bloc; 720 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 721 MatScalar *a; 722 723 /* Should this be the same type as mat? */ 724 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 725 if (!rank) { 726 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 727 } else { 728 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 729 } 730 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 731 ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 733 734 /* copy over the A part */ 735 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 736 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 737 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 738 739 for (i=0; i<mbs; i++) { 740 rvals[0] = bs*(baij->rstart + i); 741 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 742 for (j=ai[i]; j<ai[i+1]; j++) { 743 col = (baij->cstart+aj[j])*bs; 744 for (k=0; k<bs; k++) { 745 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 746 col++; a += bs; 747 } 748 } 749 } 750 /* copy over the B part */ 751 Bloc = (Mat_SeqBAIJ*)baij->B->data; 752 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 753 for (i=0; i<mbs; i++) { 754 rvals[0] = bs*(baij->rstart + i); 755 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 756 for (j=ai[i]; j<ai[i+1]; j++) { 757 col = baij->garray[aj[j]]*bs; 758 for (k=0; k<bs; k++) { 759 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 760 col++; a += bs; 761 } 762 } 763 } 764 ierr = PetscFree(rvals);CHKERRQ(ierr); 765 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 /* 768 Everyone has to call to draw the matrix since the graphics waits are 769 synchronized across all processors that share the PetscDraw object 770 */ 771 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 772 if (!rank) { 773 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 774 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 775 } 776 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 777 ierr = MatDestroy(A);CHKERRQ(ierr); 778 } 779 PetscFunctionReturn(0); 780 } 781 782 #undef __FUNCT__ 783 #define __FUNCT__ "MatView_MPISBAIJ" 784 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 785 { 786 PetscErrorCode ierr; 787 PetscTruth iascii,isdraw,issocket,isbinary; 788 789 PetscFunctionBegin; 790 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 791 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 792 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 793 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 794 if (iascii || isdraw || issocket || isbinary) { 795 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 796 } else { 797 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 798 } 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatDestroy_MPISBAIJ" 804 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 805 { 806 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 807 PetscErrorCode ierr; 808 809 PetscFunctionBegin; 810 #if defined(PETSC_USE_LOG) 811 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 812 #endif 813 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 814 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 815 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 816 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 817 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 818 #if defined (PETSC_USE_CTABLE) 819 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 820 #else 821 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 822 #endif 823 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 824 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 825 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 826 if (baij->slvec0) { 827 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 828 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 829 } 830 if (baij->slvec1) { 831 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 832 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 833 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 834 } 835 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 836 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 837 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 838 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 839 #if defined(PETSC_USE_MAT_SINGLE) 840 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 841 #endif 842 ierr = PetscFree(baij);CHKERRQ(ierr); 843 844 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 845 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 846 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 847 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatMult_MPISBAIJ" 853 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 854 { 855 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 856 PetscErrorCode ierr; 857 PetscInt nt,mbs=a->mbs,bs=A->bs; 858 PetscScalar *x,*from,zero=0.0; 859 860 PetscFunctionBegin; 861 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 862 if (nt != A->n) { 863 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 864 } 865 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 866 if (nt != A->m) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 868 } 869 870 /* diagonal part */ 871 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 872 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 873 874 /* subdiagonal part */ 875 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 876 877 /* copy x into the vec slvec0 */ 878 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 879 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 880 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 881 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 882 883 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 884 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 885 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 886 887 /* supperdiagonal part */ 888 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 889 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 895 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 896 { 897 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 898 PetscErrorCode ierr; 899 PetscInt nt; 900 901 PetscFunctionBegin; 902 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 903 if (nt != A->n) { 904 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 905 } 906 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 907 if (nt != A->m) { 908 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 909 } 910 911 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 912 /* do diagonal part */ 913 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 914 /* do supperdiagonal part */ 915 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 916 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 917 /* do subdiagonal part */ 918 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 919 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 920 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 921 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 927 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 928 { 929 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 930 PetscErrorCode ierr; 931 PetscInt mbs=a->mbs,bs=A->bs; 932 PetscScalar *x,*from,zero=0.0; 933 934 PetscFunctionBegin; 935 /* 936 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 937 PetscSynchronizedFlush(A->comm); 938 */ 939 /* diagonal part */ 940 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 941 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 942 943 /* subdiagonal part */ 944 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 945 946 /* copy x into the vec slvec0 */ 947 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 948 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 949 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 950 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 951 952 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 953 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 954 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 955 956 /* supperdiagonal part */ 957 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 958 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 964 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 965 { 966 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 967 PetscErrorCode ierr; 968 969 PetscFunctionBegin; 970 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 971 /* do diagonal part */ 972 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 973 /* do supperdiagonal part */ 974 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 975 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 976 977 /* do subdiagonal part */ 978 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 979 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 980 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 981 982 PetscFunctionReturn(0); 983 } 984 985 /* 986 This only works correctly for square matrices where the subblock A->A is the 987 diagonal block 988 */ 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 991 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 992 { 993 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 994 PetscErrorCode ierr; 995 996 PetscFunctionBegin; 997 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 998 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatScale_MPISBAIJ" 1004 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1005 { 1006 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1007 PetscErrorCode ierr; 1008 1009 PetscFunctionBegin; 1010 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1011 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1017 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1018 { 1019 PetscFunctionBegin; 1020 if (matin) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format"); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1026 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1027 { 1028 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1029 1030 PetscFunctionBegin; 1031 if (!baij->getrowactive) { 1032 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1033 } 1034 baij->getrowactive = PETSC_FALSE; 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1040 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1041 { 1042 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1043 PetscErrorCode ierr; 1044 1045 PetscFunctionBegin; 1046 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1047 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1053 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1054 { 1055 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1056 PetscErrorCode ierr; 1057 1058 PetscFunctionBegin; 1059 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1060 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1066 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1067 { 1068 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1069 PetscErrorCode ierr; 1070 1071 PetscFunctionBegin; 1072 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1073 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1079 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1080 { 1081 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1082 Mat A = a->A,B = a->B; 1083 PetscErrorCode ierr; 1084 PetscReal isend[5],irecv[5]; 1085 1086 PetscFunctionBegin; 1087 info->block_size = (PetscReal)matin->bs; 1088 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1089 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1090 isend[3] = info->memory; isend[4] = info->mallocs; 1091 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1092 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1093 isend[3] += info->memory; isend[4] += info->mallocs; 1094 if (flag == MAT_LOCAL) { 1095 info->nz_used = isend[0]; 1096 info->nz_allocated = isend[1]; 1097 info->nz_unneeded = isend[2]; 1098 info->memory = isend[3]; 1099 info->mallocs = isend[4]; 1100 } else if (flag == MAT_GLOBAL_MAX) { 1101 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1102 info->nz_used = irecv[0]; 1103 info->nz_allocated = irecv[1]; 1104 info->nz_unneeded = irecv[2]; 1105 info->memory = irecv[3]; 1106 info->mallocs = irecv[4]; 1107 } else if (flag == MAT_GLOBAL_SUM) { 1108 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1109 info->nz_used = irecv[0]; 1110 info->nz_allocated = irecv[1]; 1111 info->nz_unneeded = irecv[2]; 1112 info->memory = irecv[3]; 1113 info->mallocs = irecv[4]; 1114 } else { 1115 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1116 } 1117 info->rows_global = (PetscReal)A->M; 1118 info->columns_global = (PetscReal)A->N; 1119 info->rows_local = (PetscReal)A->m; 1120 info->columns_local = (PetscReal)A->N; 1121 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1122 info->fill_ratio_needed = 0; 1123 info->factor_mallocs = 0; 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1129 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1130 { 1131 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 switch (op) { 1136 case MAT_NO_NEW_NONZERO_LOCATIONS: 1137 case MAT_YES_NEW_NONZERO_LOCATIONS: 1138 case MAT_COLUMNS_UNSORTED: 1139 case MAT_COLUMNS_SORTED: 1140 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1141 case MAT_KEEP_ZEROED_ROWS: 1142 case MAT_NEW_NONZERO_LOCATION_ERR: 1143 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1144 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1145 break; 1146 case MAT_ROW_ORIENTED: 1147 a->roworiented = PETSC_TRUE; 1148 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1149 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1150 break; 1151 case MAT_ROWS_SORTED: 1152 case MAT_ROWS_UNSORTED: 1153 case MAT_YES_NEW_DIAGONALS: 1154 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1155 break; 1156 case MAT_COLUMN_ORIENTED: 1157 a->roworiented = PETSC_FALSE; 1158 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1159 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1160 break; 1161 case MAT_IGNORE_OFF_PROC_ENTRIES: 1162 a->donotstash = PETSC_TRUE; 1163 break; 1164 case MAT_NO_NEW_DIAGONALS: 1165 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1166 case MAT_USE_HASH_TABLE: 1167 a->ht_flag = PETSC_TRUE; 1168 break; 1169 case MAT_NOT_SYMMETRIC: 1170 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1171 case MAT_HERMITIAN: 1172 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1173 case MAT_SYMMETRIC: 1174 case MAT_STRUCTURALLY_SYMMETRIC: 1175 case MAT_NOT_HERMITIAN: 1176 case MAT_SYMMETRY_ETERNAL: 1177 case MAT_NOT_SYMMETRY_ETERNAL: 1178 break; 1179 default: 1180 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1187 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1188 { 1189 PetscErrorCode ierr; 1190 PetscFunctionBegin; 1191 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1197 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1198 { 1199 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1200 Mat a=baij->A, b=baij->B; 1201 PetscErrorCode ierr; 1202 PetscInt nv,m,n; 1203 PetscTruth flg; 1204 1205 PetscFunctionBegin; 1206 if (ll != rr){ 1207 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1208 if (!flg) 1209 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1210 } 1211 if (!ll) PetscFunctionReturn(0); 1212 1213 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1214 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1215 1216 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1217 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1218 1219 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1220 1221 /* left diagonalscale the off-diagonal part */ 1222 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1223 1224 /* scale the diagonal part */ 1225 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1226 1227 /* right diagonalscale the off-diagonal part */ 1228 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1229 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1235 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1236 { 1237 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1238 MPI_Comm comm = A->comm; 1239 static PetscTruth called = PETSC_FALSE; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 if (!a->rank) { 1244 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1245 } 1246 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1247 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1248 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1254 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1255 { 1256 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatEqual_MPISBAIJ" 1268 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1269 { 1270 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1271 Mat a,b,c,d; 1272 PetscTruth flg; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 a = matA->A; b = matA->B; 1277 c = matB->A; d = matB->B; 1278 1279 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1280 if (flg) { 1281 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1282 } 1283 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatCopy_MPISBAIJ" 1289 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1290 { 1291 PetscErrorCode ierr; 1292 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1293 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1294 1295 PetscFunctionBegin; 1296 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1297 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1298 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1299 } else { 1300 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1301 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1302 } 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1308 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1309 { 1310 PetscErrorCode ierr; 1311 1312 PetscFunctionBegin; 1313 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #include "petscblaslapack.h" 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1320 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1321 { 1322 PetscErrorCode ierr; 1323 PetscInt i; 1324 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1325 PetscBLASInt bnz,one=1; 1326 Mat_SeqSBAIJ *xa,*ya; 1327 Mat_SeqBAIJ *xb,*yb; 1328 1329 PetscFunctionBegin; 1330 if (str == SAME_NONZERO_PATTERN) { 1331 PetscScalar alpha = a; 1332 xa = (Mat_SeqSBAIJ *)xx->A->data; 1333 ya = (Mat_SeqSBAIJ *)yy->A->data; 1334 bnz = (PetscBLASInt)xa->nz; 1335 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1336 xb = (Mat_SeqBAIJ *)xx->B->data; 1337 yb = (Mat_SeqBAIJ *)yy->B->data; 1338 bnz = (PetscBLASInt)xb->nz; 1339 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1340 } else { 1341 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1342 } 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1348 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1349 { 1350 PetscErrorCode ierr; 1351 PetscInt i; 1352 PetscTruth flg; 1353 1354 PetscFunctionBegin; 1355 for (i=0; i<n; i++) { 1356 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1357 if (!flg) { 1358 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1359 } 1360 } 1361 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 1366 /* -------------------------------------------------------------------*/ 1367 static struct _MatOps MatOps_Values = { 1368 MatSetValues_MPISBAIJ, 1369 MatGetRow_MPISBAIJ, 1370 MatRestoreRow_MPISBAIJ, 1371 MatMult_MPISBAIJ, 1372 /* 4*/ MatMultAdd_MPISBAIJ, 1373 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1374 MatMultAdd_MPISBAIJ, 1375 0, 1376 0, 1377 0, 1378 /*10*/ 0, 1379 0, 1380 0, 1381 MatRelax_MPISBAIJ, 1382 MatTranspose_MPISBAIJ, 1383 /*15*/ MatGetInfo_MPISBAIJ, 1384 MatEqual_MPISBAIJ, 1385 MatGetDiagonal_MPISBAIJ, 1386 MatDiagonalScale_MPISBAIJ, 1387 MatNorm_MPISBAIJ, 1388 /*20*/ MatAssemblyBegin_MPISBAIJ, 1389 MatAssemblyEnd_MPISBAIJ, 1390 0, 1391 MatSetOption_MPISBAIJ, 1392 MatZeroEntries_MPISBAIJ, 1393 /*25*/ 0, 1394 0, 1395 0, 1396 0, 1397 0, 1398 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1399 0, 1400 0, 1401 0, 1402 0, 1403 /*35*/ MatDuplicate_MPISBAIJ, 1404 0, 1405 0, 1406 0, 1407 0, 1408 /*40*/ MatAXPY_MPISBAIJ, 1409 MatGetSubMatrices_MPISBAIJ, 1410 MatIncreaseOverlap_MPISBAIJ, 1411 MatGetValues_MPISBAIJ, 1412 MatCopy_MPISBAIJ, 1413 /*45*/ MatPrintHelp_MPISBAIJ, 1414 MatScale_MPISBAIJ, 1415 0, 1416 0, 1417 0, 1418 /*50*/ 0, 1419 0, 1420 0, 1421 0, 1422 0, 1423 /*55*/ 0, 1424 0, 1425 MatSetUnfactored_MPISBAIJ, 1426 0, 1427 MatSetValuesBlocked_MPISBAIJ, 1428 /*60*/ 0, 1429 0, 1430 0, 1431 MatGetPetscMaps_Petsc, 1432 0, 1433 /*65*/ 0, 1434 0, 1435 0, 1436 0, 1437 0, 1438 /*70*/ MatGetRowMax_MPISBAIJ, 1439 0, 1440 0, 1441 0, 1442 0, 1443 /*75*/ 0, 1444 0, 1445 0, 1446 0, 1447 0, 1448 /*80*/ 0, 1449 0, 1450 0, 1451 0, 1452 MatLoad_MPISBAIJ, 1453 /*85*/ 0, 1454 0, 1455 0, 1456 0, 1457 0, 1458 /*90*/ 0, 1459 0, 1460 0, 1461 0, 1462 0, 1463 /*95*/ 0, 1464 0, 1465 0, 1466 0, 1467 0, 1468 /*100*/0, 1469 0, 1470 0, 1471 0, 1472 0, 1473 /*105*/0, 1474 MatRealPart_MPISBAIJ, 1475 MatImaginaryPart_MPISBAIJ 1476 }; 1477 1478 1479 EXTERN_C_BEGIN 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1482 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1483 { 1484 PetscFunctionBegin; 1485 *a = ((Mat_MPISBAIJ *)A->data)->A; 1486 *iscopy = PETSC_FALSE; 1487 PetscFunctionReturn(0); 1488 } 1489 EXTERN_C_END 1490 1491 EXTERN_C_BEGIN 1492 #undef __FUNCT__ 1493 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1494 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1495 { 1496 Mat_MPISBAIJ *b; 1497 PetscErrorCode ierr; 1498 PetscInt i,mbs,Mbs; 1499 1500 PetscFunctionBegin; 1501 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1502 1503 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1504 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1505 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1506 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1507 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1508 if (d_nnz) { 1509 for (i=0; i<B->m/bs; i++) { 1510 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1511 } 1512 } 1513 if (o_nnz) { 1514 for (i=0; i<B->m/bs; i++) { 1515 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1516 } 1517 } 1518 B->preallocated = PETSC_TRUE; 1519 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1520 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1521 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1522 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1523 1524 b = (Mat_MPISBAIJ*)B->data; 1525 mbs = B->m/bs; 1526 Mbs = B->M/bs; 1527 if (mbs*bs != B->m) { 1528 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs); 1529 } 1530 1531 B->bs = bs; 1532 b->bs2 = bs*bs; 1533 b->mbs = mbs; 1534 b->nbs = mbs; 1535 b->Mbs = Mbs; 1536 b->Nbs = Mbs; 1537 1538 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1539 b->rowners[0] = 0; 1540 for (i=2; i<=b->size; i++) { 1541 b->rowners[i] += b->rowners[i-1]; 1542 } 1543 b->rstart = b->rowners[b->rank]; 1544 b->rend = b->rowners[b->rank+1]; 1545 b->cstart = b->rstart; 1546 b->cend = b->rend; 1547 for (i=0; i<=b->size; i++) { 1548 b->rowners_bs[i] = b->rowners[i]*bs; 1549 } 1550 b->rstart_bs = b-> rstart*bs; 1551 b->rend_bs = b->rend*bs; 1552 1553 b->cstart_bs = b->cstart*bs; 1554 b->cend_bs = b->cend*bs; 1555 1556 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1557 ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr); 1558 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1559 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1560 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1561 1562 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1563 ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr); 1564 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1565 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1566 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1567 1568 /* build cache for off array entries formed */ 1569 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1570 1571 PetscFunctionReturn(0); 1572 } 1573 EXTERN_C_END 1574 1575 /*MC 1576 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1577 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1578 1579 Options Database Keys: 1580 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1581 1582 Level: beginner 1583 1584 .seealso: MatCreateMPISBAIJ 1585 M*/ 1586 1587 EXTERN_C_BEGIN 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatCreate_MPISBAIJ" 1590 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1591 { 1592 Mat_MPISBAIJ *b; 1593 PetscErrorCode ierr; 1594 PetscTruth flg; 1595 1596 PetscFunctionBegin; 1597 1598 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1599 B->data = (void*)b; 1600 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1601 1602 B->ops->destroy = MatDestroy_MPISBAIJ; 1603 B->ops->view = MatView_MPISBAIJ; 1604 B->mapping = 0; 1605 B->factor = 0; 1606 B->assembled = PETSC_FALSE; 1607 1608 B->insertmode = NOT_SET_VALUES; 1609 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1610 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1611 1612 /* build local table of row and column ownerships */ 1613 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 1614 b->cowners = b->rowners + b->size + 2; 1615 b->rowners_bs = b->cowners + b->size + 2; 1616 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1617 1618 /* build cache for off array entries formed */ 1619 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1620 b->donotstash = PETSC_FALSE; 1621 b->colmap = PETSC_NULL; 1622 b->garray = PETSC_NULL; 1623 b->roworiented = PETSC_TRUE; 1624 1625 #if defined(PETSC_USE_MAT_SINGLE) 1626 /* stuff for MatSetValues_XXX in single precision */ 1627 b->setvalueslen = 0; 1628 b->setvaluescopy = PETSC_NULL; 1629 #endif 1630 1631 /* stuff used in block assembly */ 1632 b->barray = 0; 1633 1634 /* stuff used for matrix vector multiply */ 1635 b->lvec = 0; 1636 b->Mvctx = 0; 1637 b->slvec0 = 0; 1638 b->slvec0b = 0; 1639 b->slvec1 = 0; 1640 b->slvec1a = 0; 1641 b->slvec1b = 0; 1642 b->sMvctx = 0; 1643 1644 /* stuff for MatGetRow() */ 1645 b->rowindices = 0; 1646 b->rowvalues = 0; 1647 b->getrowactive = PETSC_FALSE; 1648 1649 /* hash table stuff */ 1650 b->ht = 0; 1651 b->hd = 0; 1652 b->ht_size = 0; 1653 b->ht_flag = PETSC_FALSE; 1654 b->ht_fact = 0; 1655 b->ht_total_ct = 0; 1656 b->ht_insert_ct = 0; 1657 1658 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1659 if (flg) { 1660 PetscReal fact = 1.39; 1661 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1662 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1663 if (fact <= 1.0) fact = 1.39; 1664 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1665 ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 1666 } 1667 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1668 "MatStoreValues_MPISBAIJ", 1669 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1670 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1671 "MatRetrieveValues_MPISBAIJ", 1672 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1673 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1674 "MatGetDiagonalBlock_MPISBAIJ", 1675 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1676 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1677 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1678 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1679 B->symmetric = PETSC_TRUE; 1680 B->structurally_symmetric = PETSC_TRUE; 1681 B->symmetric_set = PETSC_TRUE; 1682 B->structurally_symmetric_set = PETSC_TRUE; 1683 PetscFunctionReturn(0); 1684 } 1685 EXTERN_C_END 1686 1687 /*MC 1688 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1689 1690 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1691 and MATMPISBAIJ otherwise. 1692 1693 Options Database Keys: 1694 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1695 1696 Level: beginner 1697 1698 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1699 M*/ 1700 1701 EXTERN_C_BEGIN 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatCreate_SBAIJ" 1704 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1705 { 1706 PetscErrorCode ierr; 1707 PetscMPIInt size; 1708 1709 PetscFunctionBegin; 1710 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1711 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1712 if (size == 1) { 1713 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1714 } else { 1715 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1716 } 1717 PetscFunctionReturn(0); 1718 } 1719 EXTERN_C_END 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1723 /*@C 1724 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1725 the user should preallocate the matrix storage by setting the parameters 1726 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1727 performance can be increased by more than a factor of 50. 1728 1729 Collective on Mat 1730 1731 Input Parameters: 1732 + A - the matrix 1733 . bs - size of blockk 1734 . d_nz - number of block nonzeros per block row in diagonal portion of local 1735 submatrix (same for all local rows) 1736 . d_nnz - array containing the number of block nonzeros in the various block rows 1737 in the upper triangular and diagonal part of the in diagonal portion of the local 1738 (possibly different for each block row) or PETSC_NULL. You must leave room 1739 for the diagonal entry even if it is zero. 1740 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1741 submatrix (same for all local rows). 1742 - o_nnz - array containing the number of nonzeros in the various block rows of the 1743 off-diagonal portion of the local submatrix (possibly different for 1744 each block row) or PETSC_NULL. 1745 1746 1747 Options Database Keys: 1748 . -mat_no_unroll - uses code that does not unroll the loops in the 1749 block calculations (much slower) 1750 . -mat_block_size - size of the blocks to use 1751 1752 Notes: 1753 1754 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1755 than it must be used on all processors that share the object for that argument. 1756 1757 If the *_nnz parameter is given then the *_nz parameter is ignored 1758 1759 Storage Information: 1760 For a square global matrix we define each processor's diagonal portion 1761 to be its local rows and the corresponding columns (a square submatrix); 1762 each processor's off-diagonal portion encompasses the remainder of the 1763 local matrix (a rectangular submatrix). 1764 1765 The user can specify preallocated storage for the diagonal part of 1766 the local submatrix with either d_nz or d_nnz (not both). Set 1767 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1768 memory allocation. Likewise, specify preallocated storage for the 1769 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1770 1771 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1772 the figure below we depict these three local rows and all columns (0-11). 1773 1774 .vb 1775 0 1 2 3 4 5 6 7 8 9 10 11 1776 ------------------- 1777 row 3 | o o o d d d o o o o o o 1778 row 4 | o o o d d d o o o o o o 1779 row 5 | o o o d d d o o o o o o 1780 ------------------- 1781 .ve 1782 1783 Thus, any entries in the d locations are stored in the d (diagonal) 1784 submatrix, and any entries in the o locations are stored in the 1785 o (off-diagonal) submatrix. Note that the d matrix is stored in 1786 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1787 1788 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1789 plus the diagonal part of the d matrix, 1790 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1791 In general, for PDE problems in which most nonzeros are near the diagonal, 1792 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1793 or you will get TERRIBLE performance; see the users' manual chapter on 1794 matrices. 1795 1796 Level: intermediate 1797 1798 .keywords: matrix, block, aij, compressed row, sparse, parallel 1799 1800 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1801 @*/ 1802 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1803 { 1804 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1805 1806 PetscFunctionBegin; 1807 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1808 if (f) { 1809 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatCreateMPISBAIJ" 1816 /*@C 1817 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1818 (block compressed row). For good matrix assembly performance 1819 the user should preallocate the matrix storage by setting the parameters 1820 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1821 performance can be increased by more than a factor of 50. 1822 1823 Collective on MPI_Comm 1824 1825 Input Parameters: 1826 + comm - MPI communicator 1827 . bs - size of blockk 1828 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1829 This value should be the same as the local size used in creating the 1830 y vector for the matrix-vector product y = Ax. 1831 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1832 This value should be the same as the local size used in creating the 1833 x vector for the matrix-vector product y = Ax. 1834 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1835 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1836 . d_nz - number of block nonzeros per block row in diagonal portion of local 1837 submatrix (same for all local rows) 1838 . d_nnz - array containing the number of block nonzeros in the various block rows 1839 in the upper triangular portion of the in diagonal portion of the local 1840 (possibly different for each block block row) or PETSC_NULL. 1841 You must leave room for the diagonal entry even if it is zero. 1842 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1843 submatrix (same for all local rows). 1844 - o_nnz - array containing the number of nonzeros in the various block rows of the 1845 off-diagonal portion of the local submatrix (possibly different for 1846 each block row) or PETSC_NULL. 1847 1848 Output Parameter: 1849 . A - the matrix 1850 1851 Options Database Keys: 1852 . -mat_no_unroll - uses code that does not unroll the loops in the 1853 block calculations (much slower) 1854 . -mat_block_size - size of the blocks to use 1855 . -mat_mpi - use the parallel matrix data structures even on one processor 1856 (defaults to using SeqBAIJ format on one processor) 1857 1858 Notes: 1859 The number of rows and columns must be divisible by blocksize. 1860 1861 The user MUST specify either the local or global matrix dimensions 1862 (possibly both). 1863 1864 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1865 than it must be used on all processors that share the object for that argument. 1866 1867 If the *_nnz parameter is given then the *_nz parameter is ignored 1868 1869 Storage Information: 1870 For a square global matrix we define each processor's diagonal portion 1871 to be its local rows and the corresponding columns (a square submatrix); 1872 each processor's off-diagonal portion encompasses the remainder of the 1873 local matrix (a rectangular submatrix). 1874 1875 The user can specify preallocated storage for the diagonal part of 1876 the local submatrix with either d_nz or d_nnz (not both). Set 1877 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1878 memory allocation. Likewise, specify preallocated storage for the 1879 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1880 1881 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1882 the figure below we depict these three local rows and all columns (0-11). 1883 1884 .vb 1885 0 1 2 3 4 5 6 7 8 9 10 11 1886 ------------------- 1887 row 3 | o o o d d d o o o o o o 1888 row 4 | o o o d d d o o o o o o 1889 row 5 | o o o d d d o o o o o o 1890 ------------------- 1891 .ve 1892 1893 Thus, any entries in the d locations are stored in the d (diagonal) 1894 submatrix, and any entries in the o locations are stored in the 1895 o (off-diagonal) submatrix. Note that the d matrix is stored in 1896 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1897 1898 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1899 plus the diagonal part of the d matrix, 1900 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1901 In general, for PDE problems in which most nonzeros are near the diagonal, 1902 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1903 or you will get TERRIBLE performance; see the users' manual chapter on 1904 matrices. 1905 1906 Level: intermediate 1907 1908 .keywords: matrix, block, aij, compressed row, sparse, parallel 1909 1910 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1911 @*/ 1912 1913 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 1914 { 1915 PetscErrorCode ierr; 1916 PetscMPIInt size; 1917 1918 PetscFunctionBegin; 1919 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1920 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1921 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1922 if (size > 1) { 1923 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1924 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1925 } else { 1926 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1927 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1928 } 1929 PetscFunctionReturn(0); 1930 } 1931 1932 1933 #undef __FUNCT__ 1934 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1935 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1936 { 1937 Mat mat; 1938 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1939 PetscErrorCode ierr; 1940 PetscInt len=0,nt,bs=matin->bs,mbs=oldmat->mbs; 1941 PetscScalar *array; 1942 1943 PetscFunctionBegin; 1944 *newmat = 0; 1945 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1946 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1947 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1948 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1949 1950 mat->factor = matin->factor; 1951 mat->preallocated = PETSC_TRUE; 1952 mat->assembled = PETSC_TRUE; 1953 mat->insertmode = NOT_SET_VALUES; 1954 1955 a = (Mat_MPISBAIJ*)mat->data; 1956 mat->bs = matin->bs; 1957 a->bs2 = oldmat->bs2; 1958 a->mbs = oldmat->mbs; 1959 a->nbs = oldmat->nbs; 1960 a->Mbs = oldmat->Mbs; 1961 a->Nbs = oldmat->Nbs; 1962 1963 a->rstart = oldmat->rstart; 1964 a->rend = oldmat->rend; 1965 a->cstart = oldmat->cstart; 1966 a->cend = oldmat->cend; 1967 a->size = oldmat->size; 1968 a->rank = oldmat->rank; 1969 a->donotstash = oldmat->donotstash; 1970 a->roworiented = oldmat->roworiented; 1971 a->rowindices = 0; 1972 a->rowvalues = 0; 1973 a->getrowactive = PETSC_FALSE; 1974 a->barray = 0; 1975 a->rstart_bs = oldmat->rstart_bs; 1976 a->rend_bs = oldmat->rend_bs; 1977 a->cstart_bs = oldmat->cstart_bs; 1978 a->cend_bs = oldmat->cend_bs; 1979 1980 /* hash table stuff */ 1981 a->ht = 0; 1982 a->hd = 0; 1983 a->ht_size = 0; 1984 a->ht_flag = oldmat->ht_flag; 1985 a->ht_fact = oldmat->ht_fact; 1986 a->ht_total_ct = 0; 1987 a->ht_insert_ct = 0; 1988 1989 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1990 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1991 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 1992 if (oldmat->colmap) { 1993 #if defined (PETSC_USE_CTABLE) 1994 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1995 #else 1996 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1997 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1998 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1999 #endif 2000 } else a->colmap = 0; 2001 2002 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2003 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2004 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2005 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2006 } else a->garray = 0; 2007 2008 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2009 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2010 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2011 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2012 2013 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2014 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2015 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2016 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2017 2018 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2019 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2020 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2021 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2022 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2023 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2024 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2025 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2026 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2027 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2028 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2029 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2030 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2031 2032 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2033 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2034 a->sMvctx = oldmat->sMvctx; 2035 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2036 2037 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2038 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2039 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2040 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2041 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2042 *newmat = mat; 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #include "petscsys.h" 2047 2048 #undef __FUNCT__ 2049 #define __FUNCT__ "MatLoad_MPISBAIJ" 2050 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2051 { 2052 Mat A; 2053 PetscErrorCode ierr; 2054 PetscInt i,nz,j,rstart,rend; 2055 PetscScalar *vals,*buf; 2056 MPI_Comm comm = ((PetscObject)viewer)->comm; 2057 MPI_Status status; 2058 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 2059 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2060 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2061 PetscInt bs=1,Mbs,mbs,extra_rows; 2062 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2063 PetscInt dcount,kmax,k,nzcount,tmp; 2064 int fd; 2065 2066 PetscFunctionBegin; 2067 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2068 2069 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2070 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2071 if (!rank) { 2072 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2073 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2074 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2075 if (header[3] < 0) { 2076 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2077 } 2078 } 2079 2080 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2081 M = header[1]; N = header[2]; 2082 2083 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2084 2085 /* 2086 This code adds extra rows to make sure the number of rows is 2087 divisible by the blocksize 2088 */ 2089 Mbs = M/bs; 2090 extra_rows = bs - M + bs*(Mbs); 2091 if (extra_rows == bs) extra_rows = 0; 2092 else Mbs++; 2093 if (extra_rows &&!rank) { 2094 ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2095 } 2096 2097 /* determine ownership of all rows */ 2098 mbs = Mbs/size + ((Mbs % size) > rank); 2099 m = mbs*bs; 2100 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2101 browners = rowners + size + 1; 2102 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2103 rowners[0] = 0; 2104 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2105 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2106 rstart = rowners[rank]; 2107 rend = rowners[rank+1]; 2108 2109 /* distribute row lengths to all processors */ 2110 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2111 if (!rank) { 2112 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2113 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2114 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2115 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2116 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2117 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2118 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2119 } else { 2120 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2121 } 2122 2123 if (!rank) { /* procs[0] */ 2124 /* calculate the number of nonzeros on each processor */ 2125 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2126 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2127 for (i=0; i<size; i++) { 2128 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2129 procsnz[i] += rowlengths[j]; 2130 } 2131 } 2132 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2133 2134 /* determine max buffer needed and allocate it */ 2135 maxnz = 0; 2136 for (i=0; i<size; i++) { 2137 maxnz = PetscMax(maxnz,procsnz[i]); 2138 } 2139 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2140 2141 /* read in my part of the matrix column indices */ 2142 nz = procsnz[0]; 2143 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2144 mycols = ibuf; 2145 if (size == 1) nz -= extra_rows; 2146 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2147 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2148 2149 /* read in every ones (except the last) and ship off */ 2150 for (i=1; i<size-1; i++) { 2151 nz = procsnz[i]; 2152 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2153 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2154 } 2155 /* read in the stuff for the last proc */ 2156 if (size != 1) { 2157 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2158 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2159 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2160 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2161 } 2162 ierr = PetscFree(cols);CHKERRQ(ierr); 2163 } else { /* procs[i], i>0 */ 2164 /* determine buffer space needed for message */ 2165 nz = 0; 2166 for (i=0; i<m; i++) { 2167 nz += locrowlens[i]; 2168 } 2169 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2170 mycols = ibuf; 2171 /* receive message of column indices*/ 2172 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2173 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2174 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2175 } 2176 2177 /* loop over local rows, determining number of off diagonal entries */ 2178 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2179 odlens = dlens + (rend-rstart); 2180 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2181 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2182 masked1 = mask + Mbs; 2183 masked2 = masked1 + Mbs; 2184 rowcount = 0; nzcount = 0; 2185 for (i=0; i<mbs; i++) { 2186 dcount = 0; 2187 odcount = 0; 2188 for (j=0; j<bs; j++) { 2189 kmax = locrowlens[rowcount]; 2190 for (k=0; k<kmax; k++) { 2191 tmp = mycols[nzcount++]/bs; /* block col. index */ 2192 if (!mask[tmp]) { 2193 mask[tmp] = 1; 2194 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2195 else masked1[dcount++] = tmp; /* entry in diag portion */ 2196 } 2197 } 2198 rowcount++; 2199 } 2200 2201 dlens[i] = dcount; /* d_nzz[i] */ 2202 odlens[i] = odcount; /* o_nzz[i] */ 2203 2204 /* zero out the mask elements we set */ 2205 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2206 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2207 } 2208 2209 /* create our matrix */ 2210 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2211 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2212 ierr = MatSetType(A,type);CHKERRQ(ierr); 2213 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2214 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2215 2216 if (!rank) { 2217 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2218 /* read in my part of the matrix numerical values */ 2219 nz = procsnz[0]; 2220 vals = buf; 2221 mycols = ibuf; 2222 if (size == 1) nz -= extra_rows; 2223 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2224 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2225 2226 /* insert into matrix */ 2227 jj = rstart*bs; 2228 for (i=0; i<m; i++) { 2229 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2230 mycols += locrowlens[i]; 2231 vals += locrowlens[i]; 2232 jj++; 2233 } 2234 2235 /* read in other processors (except the last one) and ship out */ 2236 for (i=1; i<size-1; i++) { 2237 nz = procsnz[i]; 2238 vals = buf; 2239 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2240 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2241 } 2242 /* the last proc */ 2243 if (size != 1){ 2244 nz = procsnz[i] - extra_rows; 2245 vals = buf; 2246 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2247 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2248 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2249 } 2250 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2251 2252 } else { 2253 /* receive numeric values */ 2254 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2255 2256 /* receive message of values*/ 2257 vals = buf; 2258 mycols = ibuf; 2259 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2260 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2261 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2262 2263 /* insert into matrix */ 2264 jj = rstart*bs; 2265 for (i=0; i<m; i++) { 2266 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2267 mycols += locrowlens[i]; 2268 vals += locrowlens[i]; 2269 jj++; 2270 } 2271 } 2272 2273 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2274 ierr = PetscFree(buf);CHKERRQ(ierr); 2275 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2276 ierr = PetscFree(rowners);CHKERRQ(ierr); 2277 ierr = PetscFree(dlens);CHKERRQ(ierr); 2278 ierr = PetscFree(mask);CHKERRQ(ierr); 2279 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2280 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2281 *newmat = A; 2282 PetscFunctionReturn(0); 2283 } 2284 2285 #undef __FUNCT__ 2286 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2287 /*XXXXX@ 2288 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2289 2290 Input Parameters: 2291 . mat - the matrix 2292 . fact - factor 2293 2294 Collective on Mat 2295 2296 Level: advanced 2297 2298 Notes: 2299 This can also be set by the command line option: -mat_use_hash_table fact 2300 2301 .keywords: matrix, hashtable, factor, HT 2302 2303 .seealso: MatSetOption() 2304 @XXXXX*/ 2305 2306 2307 #undef __FUNCT__ 2308 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2309 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2310 { 2311 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2312 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2313 PetscReal atmp; 2314 PetscReal *work,*svalues,*rvalues; 2315 PetscErrorCode ierr; 2316 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2317 PetscMPIInt rank,size; 2318 PetscInt *rowners_bs,dest,count,source; 2319 PetscScalar *va; 2320 MatScalar *ba; 2321 MPI_Status stat; 2322 2323 PetscFunctionBegin; 2324 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2325 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2326 2327 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2328 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2329 2330 bs = A->bs; 2331 mbs = a->mbs; 2332 Mbs = a->Mbs; 2333 ba = b->a; 2334 bi = b->i; 2335 bj = b->j; 2336 2337 /* find ownerships */ 2338 rowners_bs = a->rowners_bs; 2339 2340 /* each proc creates an array to be distributed */ 2341 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2342 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2343 2344 /* row_max for B */ 2345 if (rank != size-1){ 2346 for (i=0; i<mbs; i++) { 2347 ncols = bi[1] - bi[0]; bi++; 2348 brow = bs*i; 2349 for (j=0; j<ncols; j++){ 2350 bcol = bs*(*bj); 2351 for (kcol=0; kcol<bs; kcol++){ 2352 col = bcol + kcol; /* local col index */ 2353 col += rowners_bs[rank+1]; /* global col index */ 2354 for (krow=0; krow<bs; krow++){ 2355 atmp = PetscAbsScalar(*ba); ba++; 2356 row = brow + krow; /* local row index */ 2357 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2358 if (work[col] < atmp) work[col] = atmp; 2359 } 2360 } 2361 bj++; 2362 } 2363 } 2364 2365 /* send values to its owners */ 2366 for (dest=rank+1; dest<size; dest++){ 2367 svalues = work + rowners_bs[dest]; 2368 count = rowners_bs[dest+1]-rowners_bs[dest]; 2369 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2370 } 2371 } 2372 2373 /* receive values */ 2374 if (rank){ 2375 rvalues = work; 2376 count = rowners_bs[rank+1]-rowners_bs[rank]; 2377 for (source=0; source<rank; source++){ 2378 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2379 /* process values */ 2380 for (i=0; i<count; i++){ 2381 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2382 } 2383 } 2384 } 2385 2386 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2387 ierr = PetscFree(work);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatRelax_MPISBAIJ" 2393 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2394 { 2395 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2396 PetscErrorCode ierr; 2397 PetscInt mbs=mat->mbs,bs=matin->bs; 2398 PetscScalar *x,*b,*ptr,zero=0.0; 2399 Vec bb1; 2400 2401 PetscFunctionBegin; 2402 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2403 if (bs > 1) 2404 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2405 2406 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2407 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2408 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2409 its--; 2410 } 2411 2412 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2413 while (its--){ 2414 2415 /* lower triangular part: slvec0b = - B^T*xx */ 2416 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2417 2418 /* copy xx into slvec0a */ 2419 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2420 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2421 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2422 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2423 2424 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2425 2426 /* copy bb into slvec1a */ 2427 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2428 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2429 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2430 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2431 2432 /* set slvec1b = 0 */ 2433 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2434 2435 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2436 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2437 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2438 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2439 2440 /* upper triangular part: bb1 = bb1 - B*x */ 2441 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2442 2443 /* local diagonal sweep */ 2444 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2445 } 2446 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2447 } else { 2448 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2449 } 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2455 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2456 { 2457 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2458 PetscErrorCode ierr; 2459 Vec lvec1,bb1; 2460 2461 PetscFunctionBegin; 2462 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2463 if (matin->bs > 1) 2464 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2465 2466 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2467 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2468 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2469 its--; 2470 } 2471 2472 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2473 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2474 while (its--){ 2475 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2476 2477 /* lower diagonal part: bb1 = bb - B^T*xx */ 2478 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2479 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2480 2481 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2482 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2483 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2484 2485 /* upper diagonal part: bb1 = bb1 - B*x */ 2486 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2487 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2488 2489 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2490 2491 /* diagonal sweep */ 2492 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2493 } 2494 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2495 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2496 } else { 2497 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2498 } 2499 PetscFunctionReturn(0); 2500 } 2501 2502