1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 104 N = nrow++ - 1; \ 105 /* shift up all the later entries in this row */ \ 106 for (ii=N; ii>=_i; ii--) { \ 107 rp[ii+1] = rp[ii]; \ 108 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 109 } \ 110 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 111 rp[_i] = bcol; \ 112 ap[bs2*_i + bs*cidx + ridx] = value; \ 113 a_noinsert:; \ 114 ailen[brow] = nrow; \ 115 } 116 #ifndef MatSetValues_SeqBAIJ_B_Private 117 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 118 { \ 119 brow = row/bs; \ 120 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 121 rmax = bimax[brow]; nrow = bilen[brow]; \ 122 bcol = col/bs; \ 123 ridx = row % bs; cidx = col % bs; \ 124 low = 0; high = nrow; \ 125 while (high-low > 3) { \ 126 t = (low+high)/2; \ 127 if (rp[t] > bcol) high = t; \ 128 else low = t; \ 129 } \ 130 for (_i=low; _i<high; _i++) { \ 131 if (rp[_i] > bcol) break; \ 132 if (rp[_i] == bcol) { \ 133 bap = ap + bs2*_i + bs*cidx + ridx; \ 134 if (addv == ADD_VALUES) *bap += value; \ 135 else *bap = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (b->nonew == 1) goto b_noinsert; \ 140 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 141 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 b_noinsert:; \ 152 bilen[brow] = nrow; \ 153 } 154 #endif 155 156 #if defined(PETSC_USE_MAT_SINGLE) 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetValues_MPISBAIJ" 159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 160 { 161 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,N = m*n; 164 MatScalar *vsingle; 165 166 PetscFunctionBegin; 167 if (N > b->setvalueslen) { 168 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 169 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 170 b->setvalueslen = N; 171 } 172 vsingle = b->setvaluescopy; 173 174 for (i=0; i<N; i++) { 175 vsingle[i] = v[i]; 176 } 177 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 184 { 185 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 186 PetscErrorCode ierr; 187 PetscInt i,N = m*n*b->bs2; 188 MatScalar *vsingle; 189 190 PetscFunctionBegin; 191 if (N > b->setvalueslen) { 192 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 193 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 194 b->setvalueslen = N; 195 } 196 vsingle = b->setvaluescopy; 197 for (i=0; i<N; i++) { 198 vsingle[i] = v[i]; 199 } 200 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 #endif 204 205 /* Only add/insert a(i,j) with i<=j (blocks). 206 Any a(i,j) with i>j input by user is ingored. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 211 { 212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 213 MatScalar value; 214 PetscTruth roworiented = baij->roworiented; 215 PetscErrorCode ierr; 216 PetscInt i,j,row,col; 217 PetscInt rstart_orig=baij->rstart_bs; 218 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 219 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 220 221 /* Some Variables required in the macro */ 222 Mat A = baij->A; 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 224 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 225 MatScalar *aa=a->a; 226 227 Mat B = baij->B; 228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 229 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 230 MatScalar *ba=b->a; 231 232 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 233 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 234 MatScalar *ap,*bap; 235 236 /* for stash */ 237 PetscInt n_loc, *in_loc=0; 238 MatScalar *v_loc=0; 239 240 PetscFunctionBegin; 241 242 if(!baij->donotstash){ 243 ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr); 244 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 245 } 246 247 for (i=0; i<m; i++) { 248 if (im[i] < 0) continue; 249 #if defined(PETSC_USE_DEBUG) 250 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 251 #endif 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 253 row = im[i] - rstart_orig; /* local row index */ 254 for (j=0; j<n; j++) { 255 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 256 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 257 col = in[j] - cstart_orig; /* local col index */ 258 brow = row/bs; bcol = col/bs; 259 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 260 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 261 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } else if (in[j] < 0) continue; 264 #if defined(PETSC_USE_DEBUG) 265 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 266 #endif 267 else { /* off-diag entry (B) */ 268 if (mat->was_assembled) { 269 if (!baij->colmap) { 270 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 271 } 272 #if defined (PETSC_USE_CTABLE) 273 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 274 col = col - 1; 275 #else 276 col = baij->colmap[in[j]/bs] - 1; 277 #endif 278 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 279 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 280 col = in[j]; 281 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 282 B = baij->B; 283 b = (Mat_SeqBAIJ*)(B)->data; 284 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 285 ba=b->a; 286 } else col += in[j]%bs; 287 } else col = in[j]; 288 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 289 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 290 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 291 } 292 } 293 } else { /* off processor entry */ 294 if (!baij->donotstash) { 295 n_loc = 0; 296 for (j=0; j<n; j++){ 297 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 298 in_loc[n_loc] = in[j]; 299 if (roworiented) { 300 v_loc[n_loc] = v[i*n+j]; 301 } else { 302 v_loc[n_loc] = v[j*m+i]; 303 } 304 n_loc++; 305 } 306 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 307 } 308 } 309 } 310 311 if(!baij->donotstash){ 312 ierr = PetscFree(in_loc);CHKERRQ(ierr); 313 ierr = PetscFree(v_loc);CHKERRQ(ierr); 314 } 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 321 { 322 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 323 const MatScalar *value; 324 MatScalar *barray=baij->barray; 325 PetscTruth roworiented = baij->roworiented; 326 PetscErrorCode ierr; 327 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 328 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 329 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 330 331 PetscFunctionBegin; 332 if(!barray) { 333 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 334 baij->barray = barray; 335 } 336 337 if (roworiented) { 338 stepval = (n-1)*bs; 339 } else { 340 stepval = (m-1)*bs; 341 } 342 for (i=0; i<m; i++) { 343 if (im[i] < 0) continue; 344 #if defined(PETSC_USE_DEBUG) 345 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 346 #endif 347 if (im[i] >= rstart && im[i] < rend) { 348 row = im[i] - rstart; 349 for (j=0; j<n; j++) { 350 /* If NumCol = 1 then a copy is not required */ 351 if ((roworiented) && (n == 1)) { 352 barray = (MatScalar*) v + i*bs2; 353 } else if((!roworiented) && (m == 1)) { 354 barray = (MatScalar*) v + j*bs2; 355 } else { /* Here a copy is required */ 356 if (roworiented) { 357 value = v + i*(stepval+bs)*bs + j*bs; 358 } else { 359 value = v + j*(stepval+bs)*bs + i*bs; 360 } 361 for (ii=0; ii<bs; ii++,value+=stepval) { 362 for (jj=0; jj<bs; jj++) { 363 *barray++ = *value++; 364 } 365 } 366 barray -=bs2; 367 } 368 369 if (in[j] >= cstart && in[j] < cend){ 370 col = in[j] - cstart; 371 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 372 } 373 else if (in[j] < 0) continue; 374 #if defined(PETSC_USE_DEBUG) 375 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 376 #endif 377 else { 378 if (mat->was_assembled) { 379 if (!baij->colmap) { 380 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 381 } 382 383 #if defined(PETSC_USE_DEBUG) 384 #if defined (PETSC_USE_CTABLE) 385 { PetscInt data; 386 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 387 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 388 } 389 #else 390 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 391 #endif 392 #endif 393 #if defined (PETSC_USE_CTABLE) 394 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 395 col = (col - 1)/bs; 396 #else 397 col = (baij->colmap[in[j]] - 1)/bs; 398 #endif 399 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 400 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 401 col = in[j]; 402 } 403 } 404 else col = in[j]; 405 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 406 } 407 } 408 } else { 409 if (!baij->donotstash) { 410 if (roworiented) { 411 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 412 } else { 413 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 414 } 415 } 416 } 417 } 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatGetValues_MPISBAIJ" 423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 424 { 425 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 426 PetscErrorCode ierr; 427 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 428 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 429 430 PetscFunctionBegin; 431 for (i=0; i<m; i++) { 432 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 433 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 434 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 435 row = idxm[i] - bsrstart; 436 for (j=0; j<n; j++) { 437 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 438 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 439 if (idxn[j] >= bscstart && idxn[j] < bscend){ 440 col = idxn[j] - bscstart; 441 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } else { 443 if (!baij->colmap) { 444 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 445 } 446 #if defined (PETSC_USE_CTABLE) 447 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 448 data --; 449 #else 450 data = baij->colmap[idxn[j]/bs]-1; 451 #endif 452 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 453 else { 454 col = data + idxn[j]%bs; 455 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 456 } 457 } 458 } 459 } else { 460 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 461 } 462 } 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatNorm_MPISBAIJ" 468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 469 { 470 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 471 PetscErrorCode ierr; 472 PetscReal sum[2],*lnorm2; 473 474 PetscFunctionBegin; 475 if (baij->size == 1) { 476 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 477 } else { 478 if (type == NORM_FROBENIUS) { 479 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 480 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 481 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 482 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 483 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 484 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 485 *norm = sqrt(sum[0] + 2*sum[1]); 486 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 487 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 488 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 489 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 490 PetscReal *rsum,*rsum2,vabs; 491 PetscInt *jj,*garray=baij->garray,rstart=baij->rstart,nz; 492 PetscInt brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs; 493 MatScalar *v; 494 495 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 496 rsum2 = rsum + mat->N; 497 ierr = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 498 /* Amat */ 499 v = amat->a; jj = amat->j; 500 for (brow=0; brow<mbs; brow++) { 501 grow = bs*(rstart + brow); 502 nz = amat->i[brow+1] - amat->i[brow]; 503 for (bcol=0; bcol<nz; bcol++){ 504 gcol = bs*(rstart + *jj); jj++; 505 for (col=0; col<bs; col++){ 506 for (row=0; row<bs; row++){ 507 vabs = PetscAbsScalar(*v); v++; 508 rsum[gcol+col] += vabs; 509 /* non-diagonal block */ 510 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 511 } 512 } 513 } 514 } 515 /* Bmat */ 516 v = bmat->a; jj = bmat->j; 517 for (brow=0; brow<mbs; brow++) { 518 grow = bs*(rstart + brow); 519 nz = bmat->i[brow+1] - bmat->i[brow]; 520 for (bcol=0; bcol<nz; bcol++){ 521 gcol = bs*garray[*jj]; jj++; 522 for (col=0; col<bs; col++){ 523 for (row=0; row<bs; row++){ 524 vabs = PetscAbsScalar(*v); v++; 525 rsum[gcol+col] += vabs; 526 rsum[grow+row] += vabs; 527 } 528 } 529 } 530 } 531 ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 532 *norm = 0.0; 533 for (col=0; col<mat->N; col++) { 534 if (rsum2[col] > *norm) *norm = rsum2[col]; 535 } 536 ierr = PetscFree(rsum);CHKERRQ(ierr); 537 } else { 538 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 539 } 540 } 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 547 { 548 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 549 PetscErrorCode ierr; 550 PetscInt nstash,reallocs; 551 InsertMode addv; 552 553 PetscFunctionBegin; 554 if (baij->donotstash) { 555 PetscFunctionReturn(0); 556 } 557 558 /* make sure all processors are either in INSERTMODE or ADDMODE */ 559 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 560 if (addv == (ADD_VALUES|INSERT_VALUES)) { 561 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 562 } 563 mat->insertmode = addv; /* in case this processor had no cache */ 564 565 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 566 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 567 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 568 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 569 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 570 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 577 { 578 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 579 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 580 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 581 PetscErrorCode ierr; 582 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 583 PetscInt *row,*col,other_disassembled; 584 PetscMPIInt n; 585 PetscTruth r1,r2,r3; 586 MatScalar *val; 587 InsertMode addv = mat->insertmode; 588 589 PetscFunctionBegin; 590 591 if (!baij->donotstash) { 592 while (1) { 593 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 594 if (!flg) break; 595 596 for (i=0; i<n;) { 597 /* Now identify the consecutive vals belonging to the same row */ 598 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 599 if (j < n) ncols = j-i; 600 else ncols = n-i; 601 /* Now assemble all these values with a single function call */ 602 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 603 i = j; 604 } 605 } 606 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 607 /* Now process the block-stash. Since the values are stashed column-oriented, 608 set the roworiented flag to column oriented, and after MatSetValues() 609 restore the original flags */ 610 r1 = baij->roworiented; 611 r2 = a->roworiented; 612 r3 = b->roworiented; 613 baij->roworiented = PETSC_FALSE; 614 a->roworiented = PETSC_FALSE; 615 b->roworiented = PETSC_FALSE; 616 while (1) { 617 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 618 if (!flg) break; 619 620 for (i=0; i<n;) { 621 /* Now identify the consecutive vals belonging to the same row */ 622 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 623 if (j < n) ncols = j-i; 624 else ncols = n-i; 625 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 626 i = j; 627 } 628 } 629 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 630 baij->roworiented = r1; 631 a->roworiented = r2; 632 b->roworiented = r3; 633 } 634 635 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 636 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 637 638 /* determine if any processor has disassembled, if so we must 639 also disassemble ourselfs, in order that we may reassemble. */ 640 /* 641 if nonzero structure of submatrix B cannot change then we know that 642 no processor disassembled thus we can skip this stuff 643 */ 644 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 645 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 646 if (mat->was_assembled && !other_disassembled) { 647 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 648 } 649 } 650 651 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 652 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 653 } 654 b->compressedrow.use = PETSC_TRUE; 655 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 656 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 657 658 if (baij->rowvalues) { 659 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 660 baij->rowvalues = 0; 661 } 662 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt bs = mat->bs; 673 PetscMPIInt size = baij->size,rank = baij->rank; 674 PetscTruth iascii,isdraw; 675 PetscViewer sviewer; 676 PetscViewerFormat format; 677 678 PetscFunctionBegin; 679 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 680 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 681 if (iascii) { 682 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 683 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 684 MatInfo info; 685 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 686 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 687 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 688 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 689 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 690 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 692 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 694 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 695 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } else if (format == PETSC_VIEWER_ASCII_INFO) { 698 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 701 PetscFunctionReturn(0); 702 } 703 } 704 705 if (isdraw) { 706 PetscDraw draw; 707 PetscTruth isnull; 708 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 709 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 710 } 711 712 if (size == 1) { 713 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 714 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 715 } else { 716 /* assemble the entire matrix onto first processor. */ 717 Mat A; 718 Mat_SeqSBAIJ *Aloc; 719 Mat_SeqBAIJ *Bloc; 720 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 721 MatScalar *a; 722 723 /* Should this be the same type as mat? */ 724 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 725 if (!rank) { 726 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 727 } else { 728 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 729 } 730 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 731 ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 733 734 /* copy over the A part */ 735 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 736 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 737 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 738 739 for (i=0; i<mbs; i++) { 740 rvals[0] = bs*(baij->rstart + i); 741 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 742 for (j=ai[i]; j<ai[i+1]; j++) { 743 col = (baij->cstart+aj[j])*bs; 744 for (k=0; k<bs; k++) { 745 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 746 col++; a += bs; 747 } 748 } 749 } 750 /* copy over the B part */ 751 Bloc = (Mat_SeqBAIJ*)baij->B->data; 752 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 753 for (i=0; i<mbs; i++) { 754 rvals[0] = bs*(baij->rstart + i); 755 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 756 for (j=ai[i]; j<ai[i+1]; j++) { 757 col = baij->garray[aj[j]]*bs; 758 for (k=0; k<bs; k++) { 759 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 760 col++; a += bs; 761 } 762 } 763 } 764 ierr = PetscFree(rvals);CHKERRQ(ierr); 765 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 /* 768 Everyone has to call to draw the matrix since the graphics waits are 769 synchronized across all processors that share the PetscDraw object 770 */ 771 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 772 if (!rank) { 773 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 774 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 775 } 776 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 777 ierr = MatDestroy(A);CHKERRQ(ierr); 778 } 779 PetscFunctionReturn(0); 780 } 781 782 #undef __FUNCT__ 783 #define __FUNCT__ "MatView_MPISBAIJ" 784 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 785 { 786 PetscErrorCode ierr; 787 PetscTruth iascii,isdraw,issocket,isbinary; 788 789 PetscFunctionBegin; 790 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 791 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 792 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 793 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 794 if (iascii || isdraw || issocket || isbinary) { 795 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 796 } else { 797 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 798 } 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatDestroy_MPISBAIJ" 804 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 805 { 806 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 807 PetscErrorCode ierr; 808 809 PetscFunctionBegin; 810 #if defined(PETSC_USE_LOG) 811 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 812 #endif 813 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 814 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 815 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 816 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 817 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 818 #if defined (PETSC_USE_CTABLE) 819 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 820 #else 821 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 822 #endif 823 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 824 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 825 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 826 if (baij->slvec0) { 827 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 828 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 829 } 830 if (baij->slvec1) { 831 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 832 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 833 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 834 } 835 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 836 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 837 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 838 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 839 #if defined(PETSC_USE_MAT_SINGLE) 840 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 841 #endif 842 ierr = PetscFree(baij);CHKERRQ(ierr); 843 844 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 845 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 846 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 847 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatMult_MPISBAIJ" 853 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 854 { 855 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 856 PetscErrorCode ierr; 857 PetscInt nt,mbs=a->mbs,bs=A->bs; 858 PetscScalar *x,*from,zero=0.0; 859 860 PetscFunctionBegin; 861 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 862 if (nt != A->n) { 863 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 864 } 865 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 866 if (nt != A->m) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 868 } 869 870 /* diagonal part */ 871 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 872 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 873 874 /* subdiagonal part */ 875 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 876 877 /* copy x into the vec slvec0 */ 878 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 879 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 880 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 881 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 882 883 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 884 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 885 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 886 887 /* supperdiagonal part */ 888 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 889 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 895 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 896 { 897 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 898 PetscErrorCode ierr; 899 PetscInt nt; 900 901 PetscFunctionBegin; 902 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 903 if (nt != A->n) { 904 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 905 } 906 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 907 if (nt != A->m) { 908 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 909 } 910 911 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 912 /* do diagonal part */ 913 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 914 /* do supperdiagonal part */ 915 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 916 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 917 /* do subdiagonal part */ 918 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 919 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 920 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 921 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 927 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 928 { 929 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 930 PetscErrorCode ierr; 931 PetscInt mbs=a->mbs,bs=A->bs; 932 PetscScalar *x,*from,zero=0.0; 933 934 PetscFunctionBegin; 935 /* 936 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 937 PetscSynchronizedFlush(A->comm); 938 */ 939 /* diagonal part */ 940 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 941 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 942 943 /* subdiagonal part */ 944 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 945 946 /* copy x into the vec slvec0 */ 947 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 948 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 949 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 950 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 951 952 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 953 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 954 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 955 956 /* supperdiagonal part */ 957 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 958 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 964 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 965 { 966 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 967 PetscErrorCode ierr; 968 969 PetscFunctionBegin; 970 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 971 /* do diagonal part */ 972 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 973 /* do supperdiagonal part */ 974 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 975 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 976 977 /* do subdiagonal part */ 978 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 979 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 980 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 981 982 PetscFunctionReturn(0); 983 } 984 985 /* 986 This only works correctly for square matrices where the subblock A->A is the 987 diagonal block 988 */ 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 991 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 992 { 993 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 994 PetscErrorCode ierr; 995 996 PetscFunctionBegin; 997 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 998 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatScale_MPISBAIJ" 1004 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1005 { 1006 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1007 PetscErrorCode ierr; 1008 1009 PetscFunctionBegin; 1010 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1011 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1017 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1018 { 1019 PetscFunctionBegin; 1020 if (matin) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format"); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1026 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1027 { 1028 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1029 1030 PetscFunctionBegin; 1031 if (!baij->getrowactive) { 1032 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1033 } 1034 baij->getrowactive = PETSC_FALSE; 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1040 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1041 { 1042 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1043 PetscErrorCode ierr; 1044 1045 PetscFunctionBegin; 1046 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1047 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1053 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1054 { 1055 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1056 Mat A = a->A,B = a->B; 1057 PetscErrorCode ierr; 1058 PetscReal isend[5],irecv[5]; 1059 1060 PetscFunctionBegin; 1061 info->block_size = (PetscReal)matin->bs; 1062 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1063 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1064 isend[3] = info->memory; isend[4] = info->mallocs; 1065 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1066 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1067 isend[3] += info->memory; isend[4] += info->mallocs; 1068 if (flag == MAT_LOCAL) { 1069 info->nz_used = isend[0]; 1070 info->nz_allocated = isend[1]; 1071 info->nz_unneeded = isend[2]; 1072 info->memory = isend[3]; 1073 info->mallocs = isend[4]; 1074 } else if (flag == MAT_GLOBAL_MAX) { 1075 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1076 info->nz_used = irecv[0]; 1077 info->nz_allocated = irecv[1]; 1078 info->nz_unneeded = irecv[2]; 1079 info->memory = irecv[3]; 1080 info->mallocs = irecv[4]; 1081 } else if (flag == MAT_GLOBAL_SUM) { 1082 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1083 info->nz_used = irecv[0]; 1084 info->nz_allocated = irecv[1]; 1085 info->nz_unneeded = irecv[2]; 1086 info->memory = irecv[3]; 1087 info->mallocs = irecv[4]; 1088 } else { 1089 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1090 } 1091 info->rows_global = (PetscReal)A->M; 1092 info->columns_global = (PetscReal)A->N; 1093 info->rows_local = (PetscReal)A->m; 1094 info->columns_local = (PetscReal)A->N; 1095 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1096 info->fill_ratio_needed = 0; 1097 info->factor_mallocs = 0; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1103 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1104 { 1105 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 switch (op) { 1110 case MAT_NO_NEW_NONZERO_LOCATIONS: 1111 case MAT_YES_NEW_NONZERO_LOCATIONS: 1112 case MAT_COLUMNS_UNSORTED: 1113 case MAT_COLUMNS_SORTED: 1114 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1115 case MAT_KEEP_ZEROED_ROWS: 1116 case MAT_NEW_NONZERO_LOCATION_ERR: 1117 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1118 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1119 break; 1120 case MAT_ROW_ORIENTED: 1121 a->roworiented = PETSC_TRUE; 1122 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1123 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1124 break; 1125 case MAT_ROWS_SORTED: 1126 case MAT_ROWS_UNSORTED: 1127 case MAT_YES_NEW_DIAGONALS: 1128 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1129 break; 1130 case MAT_COLUMN_ORIENTED: 1131 a->roworiented = PETSC_FALSE; 1132 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1133 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1134 break; 1135 case MAT_IGNORE_OFF_PROC_ENTRIES: 1136 a->donotstash = PETSC_TRUE; 1137 break; 1138 case MAT_NO_NEW_DIAGONALS: 1139 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1140 case MAT_USE_HASH_TABLE: 1141 a->ht_flag = PETSC_TRUE; 1142 break; 1143 case MAT_NOT_SYMMETRIC: 1144 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1145 case MAT_HERMITIAN: 1146 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1147 case MAT_SYMMETRIC: 1148 case MAT_STRUCTURALLY_SYMMETRIC: 1149 case MAT_NOT_HERMITIAN: 1150 case MAT_SYMMETRY_ETERNAL: 1151 case MAT_NOT_SYMMETRY_ETERNAL: 1152 break; 1153 default: 1154 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1155 } 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNCT__ 1160 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1161 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1162 { 1163 PetscErrorCode ierr; 1164 PetscFunctionBegin; 1165 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1171 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1172 { 1173 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1174 Mat a=baij->A, b=baij->B; 1175 PetscErrorCode ierr; 1176 PetscInt nv,m,n; 1177 PetscTruth flg; 1178 1179 PetscFunctionBegin; 1180 if (ll != rr){ 1181 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1182 if (!flg) 1183 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1184 } 1185 if (!ll) PetscFunctionReturn(0); 1186 1187 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1188 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1189 1190 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1191 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1192 1193 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1194 1195 /* left diagonalscale the off-diagonal part */ 1196 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1197 1198 /* scale the diagonal part */ 1199 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1200 1201 /* right diagonalscale the off-diagonal part */ 1202 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1203 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1209 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1210 { 1211 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1212 MPI_Comm comm = A->comm; 1213 static PetscTruth called = PETSC_FALSE; 1214 PetscErrorCode ierr; 1215 1216 PetscFunctionBegin; 1217 if (!a->rank) { 1218 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1219 } 1220 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1221 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1222 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1228 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1229 { 1230 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatEqual_MPISBAIJ" 1242 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1243 { 1244 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1245 Mat a,b,c,d; 1246 PetscTruth flg; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 a = matA->A; b = matA->B; 1251 c = matB->A; d = matB->B; 1252 1253 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1254 if (flg) { 1255 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1256 } 1257 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1263 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1264 { 1265 PetscErrorCode ierr; 1266 1267 PetscFunctionBegin; 1268 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 #undef __FUNCT__ 1273 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1274 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1275 { 1276 PetscErrorCode ierr; 1277 PetscInt i; 1278 PetscTruth flg; 1279 1280 PetscFunctionBegin; 1281 for (i=0; i<n; i++) { 1282 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1283 if (!flg) { 1284 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1285 } 1286 } 1287 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 1292 /* -------------------------------------------------------------------*/ 1293 static struct _MatOps MatOps_Values = { 1294 MatSetValues_MPISBAIJ, 1295 MatGetRow_MPISBAIJ, 1296 MatRestoreRow_MPISBAIJ, 1297 MatMult_MPISBAIJ, 1298 /* 4*/ MatMultAdd_MPISBAIJ, 1299 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1300 MatMultAdd_MPISBAIJ, 1301 0, 1302 0, 1303 0, 1304 /*10*/ 0, 1305 0, 1306 0, 1307 MatRelax_MPISBAIJ, 1308 MatTranspose_MPISBAIJ, 1309 /*15*/ MatGetInfo_MPISBAIJ, 1310 MatEqual_MPISBAIJ, 1311 MatGetDiagonal_MPISBAIJ, 1312 MatDiagonalScale_MPISBAIJ, 1313 MatNorm_MPISBAIJ, 1314 /*20*/ MatAssemblyBegin_MPISBAIJ, 1315 MatAssemblyEnd_MPISBAIJ, 1316 0, 1317 MatSetOption_MPISBAIJ, 1318 MatZeroEntries_MPISBAIJ, 1319 /*25*/ 0, 1320 0, 1321 0, 1322 0, 1323 0, 1324 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1325 0, 1326 0, 1327 0, 1328 0, 1329 /*35*/ MatDuplicate_MPISBAIJ, 1330 0, 1331 0, 1332 0, 1333 0, 1334 /*40*/ 0, 1335 MatGetSubMatrices_MPISBAIJ, 1336 MatIncreaseOverlap_MPISBAIJ, 1337 MatGetValues_MPISBAIJ, 1338 0, 1339 /*45*/ MatPrintHelp_MPISBAIJ, 1340 MatScale_MPISBAIJ, 1341 0, 1342 0, 1343 0, 1344 /*50*/ 0, 1345 0, 1346 0, 1347 0, 1348 0, 1349 /*55*/ 0, 1350 0, 1351 MatSetUnfactored_MPISBAIJ, 1352 0, 1353 MatSetValuesBlocked_MPISBAIJ, 1354 /*60*/ 0, 1355 0, 1356 0, 1357 MatGetPetscMaps_Petsc, 1358 0, 1359 /*65*/ 0, 1360 0, 1361 0, 1362 0, 1363 0, 1364 /*70*/ MatGetRowMax_MPISBAIJ, 1365 0, 1366 0, 1367 0, 1368 0, 1369 /*75*/ 0, 1370 0, 1371 0, 1372 0, 1373 0, 1374 /*80*/ 0, 1375 0, 1376 0, 1377 0, 1378 MatLoad_MPISBAIJ, 1379 /*85*/ 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 /*90*/ 0, 1385 0, 1386 0, 1387 0, 1388 0, 1389 /*95*/ 0, 1390 0, 1391 0, 1392 0}; 1393 1394 1395 EXTERN_C_BEGIN 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1398 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1399 { 1400 PetscFunctionBegin; 1401 *a = ((Mat_MPISBAIJ *)A->data)->A; 1402 *iscopy = PETSC_FALSE; 1403 PetscFunctionReturn(0); 1404 } 1405 EXTERN_C_END 1406 1407 EXTERN_C_BEGIN 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1410 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1411 { 1412 Mat_MPISBAIJ *b; 1413 PetscErrorCode ierr; 1414 PetscInt i,mbs,Mbs; 1415 1416 PetscFunctionBegin; 1417 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1418 1419 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1420 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1421 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1422 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1423 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1424 if (d_nnz) { 1425 for (i=0; i<B->m/bs; i++) { 1426 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1427 } 1428 } 1429 if (o_nnz) { 1430 for (i=0; i<B->m/bs; i++) { 1431 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1432 } 1433 } 1434 B->preallocated = PETSC_TRUE; 1435 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1436 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1437 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1438 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1439 1440 b = (Mat_MPISBAIJ*)B->data; 1441 mbs = B->m/bs; 1442 Mbs = B->M/bs; 1443 if (mbs*bs != B->m) { 1444 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs); 1445 } 1446 1447 B->bs = bs; 1448 b->bs2 = bs*bs; 1449 b->mbs = mbs; 1450 b->nbs = mbs; 1451 b->Mbs = Mbs; 1452 b->Nbs = Mbs; 1453 1454 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1455 b->rowners[0] = 0; 1456 for (i=2; i<=b->size; i++) { 1457 b->rowners[i] += b->rowners[i-1]; 1458 } 1459 b->rstart = b->rowners[b->rank]; 1460 b->rend = b->rowners[b->rank+1]; 1461 b->cstart = b->rstart; 1462 b->cend = b->rend; 1463 for (i=0; i<=b->size; i++) { 1464 b->rowners_bs[i] = b->rowners[i]*bs; 1465 } 1466 b->rstart_bs = b-> rstart*bs; 1467 b->rend_bs = b->rend*bs; 1468 1469 b->cstart_bs = b->cstart*bs; 1470 b->cend_bs = b->cend*bs; 1471 1472 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1473 ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr); 1474 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1475 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1476 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1477 1478 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1479 ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr); 1480 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1481 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1482 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1483 1484 /* build cache for off array entries formed */ 1485 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1486 1487 PetscFunctionReturn(0); 1488 } 1489 EXTERN_C_END 1490 1491 /*MC 1492 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1493 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1494 1495 Options Database Keys: 1496 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1497 1498 Level: beginner 1499 1500 .seealso: MatCreateMPISBAIJ 1501 M*/ 1502 1503 EXTERN_C_BEGIN 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatCreate_MPISBAIJ" 1506 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1507 { 1508 Mat_MPISBAIJ *b; 1509 PetscErrorCode ierr; 1510 PetscTruth flg; 1511 1512 PetscFunctionBegin; 1513 1514 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1515 B->data = (void*)b; 1516 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1517 1518 B->ops->destroy = MatDestroy_MPISBAIJ; 1519 B->ops->view = MatView_MPISBAIJ; 1520 B->mapping = 0; 1521 B->factor = 0; 1522 B->assembled = PETSC_FALSE; 1523 1524 B->insertmode = NOT_SET_VALUES; 1525 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1526 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1527 1528 /* build local table of row and column ownerships */ 1529 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 1530 b->cowners = b->rowners + b->size + 2; 1531 b->rowners_bs = b->cowners + b->size + 2; 1532 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1533 1534 /* build cache for off array entries formed */ 1535 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1536 b->donotstash = PETSC_FALSE; 1537 b->colmap = PETSC_NULL; 1538 b->garray = PETSC_NULL; 1539 b->roworiented = PETSC_TRUE; 1540 1541 #if defined(PETSC_USE_MAT_SINGLE) 1542 /* stuff for MatSetValues_XXX in single precision */ 1543 b->setvalueslen = 0; 1544 b->setvaluescopy = PETSC_NULL; 1545 #endif 1546 1547 /* stuff used in block assembly */ 1548 b->barray = 0; 1549 1550 /* stuff used for matrix vector multiply */ 1551 b->lvec = 0; 1552 b->Mvctx = 0; 1553 b->slvec0 = 0; 1554 b->slvec0b = 0; 1555 b->slvec1 = 0; 1556 b->slvec1a = 0; 1557 b->slvec1b = 0; 1558 b->sMvctx = 0; 1559 1560 /* stuff for MatGetRow() */ 1561 b->rowindices = 0; 1562 b->rowvalues = 0; 1563 b->getrowactive = PETSC_FALSE; 1564 1565 /* hash table stuff */ 1566 b->ht = 0; 1567 b->hd = 0; 1568 b->ht_size = 0; 1569 b->ht_flag = PETSC_FALSE; 1570 b->ht_fact = 0; 1571 b->ht_total_ct = 0; 1572 b->ht_insert_ct = 0; 1573 1574 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1575 if (flg) { 1576 PetscReal fact = 1.39; 1577 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1578 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1579 if (fact <= 1.0) fact = 1.39; 1580 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1581 ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 1582 } 1583 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1584 "MatStoreValues_MPISBAIJ", 1585 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1586 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1587 "MatRetrieveValues_MPISBAIJ", 1588 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1589 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1590 "MatGetDiagonalBlock_MPISBAIJ", 1591 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1592 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1593 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1594 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1595 B->symmetric = PETSC_TRUE; 1596 B->structurally_symmetric = PETSC_TRUE; 1597 B->symmetric_set = PETSC_TRUE; 1598 B->structurally_symmetric_set = PETSC_TRUE; 1599 PetscFunctionReturn(0); 1600 } 1601 EXTERN_C_END 1602 1603 /*MC 1604 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1605 1606 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1607 and MATMPISBAIJ otherwise. 1608 1609 Options Database Keys: 1610 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1611 1612 Level: beginner 1613 1614 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1615 M*/ 1616 1617 EXTERN_C_BEGIN 1618 #undef __FUNCT__ 1619 #define __FUNCT__ "MatCreate_SBAIJ" 1620 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1621 { 1622 PetscErrorCode ierr; 1623 PetscMPIInt size; 1624 1625 PetscFunctionBegin; 1626 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1627 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1628 if (size == 1) { 1629 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1630 } else { 1631 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1632 } 1633 PetscFunctionReturn(0); 1634 } 1635 EXTERN_C_END 1636 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1639 /*@C 1640 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1641 the user should preallocate the matrix storage by setting the parameters 1642 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1643 performance can be increased by more than a factor of 50. 1644 1645 Collective on Mat 1646 1647 Input Parameters: 1648 + A - the matrix 1649 . bs - size of blockk 1650 . d_nz - number of block nonzeros per block row in diagonal portion of local 1651 submatrix (same for all local rows) 1652 . d_nnz - array containing the number of block nonzeros in the various block rows 1653 in the upper triangular and diagonal part of the in diagonal portion of the local 1654 (possibly different for each block row) or PETSC_NULL. You must leave room 1655 for the diagonal entry even if it is zero. 1656 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1657 submatrix (same for all local rows). 1658 - o_nnz - array containing the number of nonzeros in the various block rows of the 1659 off-diagonal portion of the local submatrix (possibly different for 1660 each block row) or PETSC_NULL. 1661 1662 1663 Options Database Keys: 1664 . -mat_no_unroll - uses code that does not unroll the loops in the 1665 block calculations (much slower) 1666 . -mat_block_size - size of the blocks to use 1667 1668 Notes: 1669 1670 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1671 than it must be used on all processors that share the object for that argument. 1672 1673 If the *_nnz parameter is given then the *_nz parameter is ignored 1674 1675 Storage Information: 1676 For a square global matrix we define each processor's diagonal portion 1677 to be its local rows and the corresponding columns (a square submatrix); 1678 each processor's off-diagonal portion encompasses the remainder of the 1679 local matrix (a rectangular submatrix). 1680 1681 The user can specify preallocated storage for the diagonal part of 1682 the local submatrix with either d_nz or d_nnz (not both). Set 1683 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1684 memory allocation. Likewise, specify preallocated storage for the 1685 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1686 1687 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1688 the figure below we depict these three local rows and all columns (0-11). 1689 1690 .vb 1691 0 1 2 3 4 5 6 7 8 9 10 11 1692 ------------------- 1693 row 3 | o o o d d d o o o o o o 1694 row 4 | o o o d d d o o o o o o 1695 row 5 | o o o d d d o o o o o o 1696 ------------------- 1697 .ve 1698 1699 Thus, any entries in the d locations are stored in the d (diagonal) 1700 submatrix, and any entries in the o locations are stored in the 1701 o (off-diagonal) submatrix. Note that the d matrix is stored in 1702 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1703 1704 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1705 plus the diagonal part of the d matrix, 1706 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1707 In general, for PDE problems in which most nonzeros are near the diagonal, 1708 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1709 or you will get TERRIBLE performance; see the users' manual chapter on 1710 matrices. 1711 1712 Level: intermediate 1713 1714 .keywords: matrix, block, aij, compressed row, sparse, parallel 1715 1716 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1717 @*/ 1718 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1719 { 1720 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1721 1722 PetscFunctionBegin; 1723 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1724 if (f) { 1725 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatCreateMPISBAIJ" 1732 /*@C 1733 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1734 (block compressed row). For good matrix assembly performance 1735 the user should preallocate the matrix storage by setting the parameters 1736 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1737 performance can be increased by more than a factor of 50. 1738 1739 Collective on MPI_Comm 1740 1741 Input Parameters: 1742 + comm - MPI communicator 1743 . bs - size of blockk 1744 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1745 This value should be the same as the local size used in creating the 1746 y vector for the matrix-vector product y = Ax. 1747 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1748 This value should be the same as the local size used in creating the 1749 x vector for the matrix-vector product y = Ax. 1750 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1751 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1752 . d_nz - number of block nonzeros per block row in diagonal portion of local 1753 submatrix (same for all local rows) 1754 . d_nnz - array containing the number of block nonzeros in the various block rows 1755 in the upper triangular portion of the in diagonal portion of the local 1756 (possibly different for each block block row) or PETSC_NULL. 1757 You must leave room for the diagonal entry even if it is zero. 1758 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1759 submatrix (same for all local rows). 1760 - o_nnz - array containing the number of nonzeros in the various block rows of the 1761 off-diagonal portion of the local submatrix (possibly different for 1762 each block row) or PETSC_NULL. 1763 1764 Output Parameter: 1765 . A - the matrix 1766 1767 Options Database Keys: 1768 . -mat_no_unroll - uses code that does not unroll the loops in the 1769 block calculations (much slower) 1770 . -mat_block_size - size of the blocks to use 1771 . -mat_mpi - use the parallel matrix data structures even on one processor 1772 (defaults to using SeqBAIJ format on one processor) 1773 1774 Notes: 1775 The number of rows and columns must be divisible by blocksize. 1776 1777 The user MUST specify either the local or global matrix dimensions 1778 (possibly both). 1779 1780 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1781 than it must be used on all processors that share the object for that argument. 1782 1783 If the *_nnz parameter is given then the *_nz parameter is ignored 1784 1785 Storage Information: 1786 For a square global matrix we define each processor's diagonal portion 1787 to be its local rows and the corresponding columns (a square submatrix); 1788 each processor's off-diagonal portion encompasses the remainder of the 1789 local matrix (a rectangular submatrix). 1790 1791 The user can specify preallocated storage for the diagonal part of 1792 the local submatrix with either d_nz or d_nnz (not both). Set 1793 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1794 memory allocation. Likewise, specify preallocated storage for the 1795 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1796 1797 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1798 the figure below we depict these three local rows and all columns (0-11). 1799 1800 .vb 1801 0 1 2 3 4 5 6 7 8 9 10 11 1802 ------------------- 1803 row 3 | o o o d d d o o o o o o 1804 row 4 | o o o d d d o o o o o o 1805 row 5 | o o o d d d o o o o o o 1806 ------------------- 1807 .ve 1808 1809 Thus, any entries in the d locations are stored in the d (diagonal) 1810 submatrix, and any entries in the o locations are stored in the 1811 o (off-diagonal) submatrix. Note that the d matrix is stored in 1812 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1813 1814 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1815 plus the diagonal part of the d matrix, 1816 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1817 In general, for PDE problems in which most nonzeros are near the diagonal, 1818 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1819 or you will get TERRIBLE performance; see the users' manual chapter on 1820 matrices. 1821 1822 Level: intermediate 1823 1824 .keywords: matrix, block, aij, compressed row, sparse, parallel 1825 1826 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1827 @*/ 1828 1829 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 1830 { 1831 PetscErrorCode ierr; 1832 PetscMPIInt size; 1833 1834 PetscFunctionBegin; 1835 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1836 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1837 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1838 if (size > 1) { 1839 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1840 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1841 } else { 1842 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1843 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1851 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1852 { 1853 Mat mat; 1854 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1855 PetscErrorCode ierr; 1856 PetscInt len=0,nt,bs=matin->bs,mbs=oldmat->mbs; 1857 PetscScalar *array; 1858 1859 PetscFunctionBegin; 1860 *newmat = 0; 1861 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1862 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1863 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1864 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1865 1866 mat->factor = matin->factor; 1867 mat->preallocated = PETSC_TRUE; 1868 mat->assembled = PETSC_TRUE; 1869 mat->insertmode = NOT_SET_VALUES; 1870 1871 a = (Mat_MPISBAIJ*)mat->data; 1872 mat->bs = matin->bs; 1873 a->bs2 = oldmat->bs2; 1874 a->mbs = oldmat->mbs; 1875 a->nbs = oldmat->nbs; 1876 a->Mbs = oldmat->Mbs; 1877 a->Nbs = oldmat->Nbs; 1878 1879 a->rstart = oldmat->rstart; 1880 a->rend = oldmat->rend; 1881 a->cstart = oldmat->cstart; 1882 a->cend = oldmat->cend; 1883 a->size = oldmat->size; 1884 a->rank = oldmat->rank; 1885 a->donotstash = oldmat->donotstash; 1886 a->roworiented = oldmat->roworiented; 1887 a->rowindices = 0; 1888 a->rowvalues = 0; 1889 a->getrowactive = PETSC_FALSE; 1890 a->barray = 0; 1891 a->rstart_bs = oldmat->rstart_bs; 1892 a->rend_bs = oldmat->rend_bs; 1893 a->cstart_bs = oldmat->cstart_bs; 1894 a->cend_bs = oldmat->cend_bs; 1895 1896 /* hash table stuff */ 1897 a->ht = 0; 1898 a->hd = 0; 1899 a->ht_size = 0; 1900 a->ht_flag = oldmat->ht_flag; 1901 a->ht_fact = oldmat->ht_fact; 1902 a->ht_total_ct = 0; 1903 a->ht_insert_ct = 0; 1904 1905 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1906 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1907 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 1908 if (oldmat->colmap) { 1909 #if defined (PETSC_USE_CTABLE) 1910 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1911 #else 1912 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1913 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1914 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1915 #endif 1916 } else a->colmap = 0; 1917 1918 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 1919 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1920 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1921 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 1922 } else a->garray = 0; 1923 1924 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1925 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1926 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1927 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1928 1929 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 1930 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 1931 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 1932 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 1933 1934 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 1935 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 1936 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 1937 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 1938 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 1939 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 1940 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 1941 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 1942 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 1943 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 1944 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 1945 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 1946 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 1947 1948 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 1949 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 1950 a->sMvctx = oldmat->sMvctx; 1951 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 1952 1953 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1954 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1955 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1956 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1957 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 1958 *newmat = mat; 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #include "petscsys.h" 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatLoad_MPISBAIJ" 1966 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1967 { 1968 Mat A; 1969 PetscErrorCode ierr; 1970 PetscInt i,nz,j,rstart,rend; 1971 PetscScalar *vals,*buf; 1972 MPI_Comm comm = ((PetscObject)viewer)->comm; 1973 MPI_Status status; 1974 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 1975 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1976 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 1977 PetscInt bs=1,Mbs,mbs,extra_rows; 1978 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1979 PetscInt dcount,kmax,k,nzcount,tmp; 1980 int fd; 1981 1982 PetscFunctionBegin; 1983 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1984 1985 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1986 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1987 if (!rank) { 1988 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1989 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1990 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1991 if (header[3] < 0) { 1992 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 1993 } 1994 } 1995 1996 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1997 M = header[1]; N = header[2]; 1998 1999 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2000 2001 /* 2002 This code adds extra rows to make sure the number of rows is 2003 divisible by the blocksize 2004 */ 2005 Mbs = M/bs; 2006 extra_rows = bs - M + bs*(Mbs); 2007 if (extra_rows == bs) extra_rows = 0; 2008 else Mbs++; 2009 if (extra_rows &&!rank) { 2010 ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2011 } 2012 2013 /* determine ownership of all rows */ 2014 mbs = Mbs/size + ((Mbs % size) > rank); 2015 m = mbs*bs; 2016 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2017 browners = rowners + size + 1; 2018 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2019 rowners[0] = 0; 2020 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2021 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2022 rstart = rowners[rank]; 2023 rend = rowners[rank+1]; 2024 2025 /* distribute row lengths to all processors */ 2026 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2027 if (!rank) { 2028 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2029 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2030 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2031 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2032 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2033 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2034 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2035 } else { 2036 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2037 } 2038 2039 if (!rank) { /* procs[0] */ 2040 /* calculate the number of nonzeros on each processor */ 2041 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2042 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2043 for (i=0; i<size; i++) { 2044 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2045 procsnz[i] += rowlengths[j]; 2046 } 2047 } 2048 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2049 2050 /* determine max buffer needed and allocate it */ 2051 maxnz = 0; 2052 for (i=0; i<size; i++) { 2053 maxnz = PetscMax(maxnz,procsnz[i]); 2054 } 2055 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2056 2057 /* read in my part of the matrix column indices */ 2058 nz = procsnz[0]; 2059 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2060 mycols = ibuf; 2061 if (size == 1) nz -= extra_rows; 2062 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2063 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2064 2065 /* read in every ones (except the last) and ship off */ 2066 for (i=1; i<size-1; i++) { 2067 nz = procsnz[i]; 2068 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2069 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2070 } 2071 /* read in the stuff for the last proc */ 2072 if (size != 1) { 2073 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2074 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2075 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2076 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2077 } 2078 ierr = PetscFree(cols);CHKERRQ(ierr); 2079 } else { /* procs[i], i>0 */ 2080 /* determine buffer space needed for message */ 2081 nz = 0; 2082 for (i=0; i<m; i++) { 2083 nz += locrowlens[i]; 2084 } 2085 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2086 mycols = ibuf; 2087 /* receive message of column indices*/ 2088 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2089 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2090 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2091 } 2092 2093 /* loop over local rows, determining number of off diagonal entries */ 2094 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2095 odlens = dlens + (rend-rstart); 2096 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2097 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2098 masked1 = mask + Mbs; 2099 masked2 = masked1 + Mbs; 2100 rowcount = 0; nzcount = 0; 2101 for (i=0; i<mbs; i++) { 2102 dcount = 0; 2103 odcount = 0; 2104 for (j=0; j<bs; j++) { 2105 kmax = locrowlens[rowcount]; 2106 for (k=0; k<kmax; k++) { 2107 tmp = mycols[nzcount++]/bs; /* block col. index */ 2108 if (!mask[tmp]) { 2109 mask[tmp] = 1; 2110 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2111 else masked1[dcount++] = tmp; /* entry in diag portion */ 2112 } 2113 } 2114 rowcount++; 2115 } 2116 2117 dlens[i] = dcount; /* d_nzz[i] */ 2118 odlens[i] = odcount; /* o_nzz[i] */ 2119 2120 /* zero out the mask elements we set */ 2121 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2122 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2123 } 2124 2125 /* create our matrix */ 2126 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2127 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2128 ierr = MatSetType(A,type);CHKERRQ(ierr); 2129 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2130 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2131 2132 if (!rank) { 2133 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2134 /* read in my part of the matrix numerical values */ 2135 nz = procsnz[0]; 2136 vals = buf; 2137 mycols = ibuf; 2138 if (size == 1) nz -= extra_rows; 2139 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2140 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2141 2142 /* insert into matrix */ 2143 jj = rstart*bs; 2144 for (i=0; i<m; i++) { 2145 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2146 mycols += locrowlens[i]; 2147 vals += locrowlens[i]; 2148 jj++; 2149 } 2150 2151 /* read in other processors (except the last one) and ship out */ 2152 for (i=1; i<size-1; i++) { 2153 nz = procsnz[i]; 2154 vals = buf; 2155 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2156 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2157 } 2158 /* the last proc */ 2159 if (size != 1){ 2160 nz = procsnz[i] - extra_rows; 2161 vals = buf; 2162 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2163 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2164 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2165 } 2166 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2167 2168 } else { 2169 /* receive numeric values */ 2170 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2171 2172 /* receive message of values*/ 2173 vals = buf; 2174 mycols = ibuf; 2175 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2176 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2177 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2178 2179 /* insert into matrix */ 2180 jj = rstart*bs; 2181 for (i=0; i<m; i++) { 2182 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2183 mycols += locrowlens[i]; 2184 vals += locrowlens[i]; 2185 jj++; 2186 } 2187 } 2188 2189 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2190 ierr = PetscFree(buf);CHKERRQ(ierr); 2191 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2192 ierr = PetscFree(rowners);CHKERRQ(ierr); 2193 ierr = PetscFree(dlens);CHKERRQ(ierr); 2194 ierr = PetscFree(mask);CHKERRQ(ierr); 2195 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2196 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2197 *newmat = A; 2198 PetscFunctionReturn(0); 2199 } 2200 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2203 /*XXXXX@ 2204 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2205 2206 Input Parameters: 2207 . mat - the matrix 2208 . fact - factor 2209 2210 Collective on Mat 2211 2212 Level: advanced 2213 2214 Notes: 2215 This can also be set by the command line option: -mat_use_hash_table fact 2216 2217 .keywords: matrix, hashtable, factor, HT 2218 2219 .seealso: MatSetOption() 2220 @XXXXX*/ 2221 2222 2223 #undef __FUNCT__ 2224 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2225 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2226 { 2227 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2229 PetscReal atmp; 2230 PetscReal *work,*svalues,*rvalues; 2231 PetscErrorCode ierr; 2232 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2233 PetscMPIInt rank,size; 2234 PetscInt *rowners_bs,dest,count,source; 2235 PetscScalar *va; 2236 MatScalar *ba; 2237 MPI_Status stat; 2238 2239 PetscFunctionBegin; 2240 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2241 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2242 2243 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2244 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2245 2246 bs = A->bs; 2247 mbs = a->mbs; 2248 Mbs = a->Mbs; 2249 ba = b->a; 2250 bi = b->i; 2251 bj = b->j; 2252 2253 /* find ownerships */ 2254 rowners_bs = a->rowners_bs; 2255 2256 /* each proc creates an array to be distributed */ 2257 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2258 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2259 2260 /* row_max for B */ 2261 if (rank != size-1){ 2262 for (i=0; i<mbs; i++) { 2263 ncols = bi[1] - bi[0]; bi++; 2264 brow = bs*i; 2265 for (j=0; j<ncols; j++){ 2266 bcol = bs*(*bj); 2267 for (kcol=0; kcol<bs; kcol++){ 2268 col = bcol + kcol; /* local col index */ 2269 col += rowners_bs[rank+1]; /* global col index */ 2270 for (krow=0; krow<bs; krow++){ 2271 atmp = PetscAbsScalar(*ba); ba++; 2272 row = brow + krow; /* local row index */ 2273 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2274 if (work[col] < atmp) work[col] = atmp; 2275 } 2276 } 2277 bj++; 2278 } 2279 } 2280 2281 /* send values to its owners */ 2282 for (dest=rank+1; dest<size; dest++){ 2283 svalues = work + rowners_bs[dest]; 2284 count = rowners_bs[dest+1]-rowners_bs[dest]; 2285 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2286 } 2287 } 2288 2289 /* receive values */ 2290 if (rank){ 2291 rvalues = work; 2292 count = rowners_bs[rank+1]-rowners_bs[rank]; 2293 for (source=0; source<rank; source++){ 2294 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2295 /* process values */ 2296 for (i=0; i<count; i++){ 2297 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2298 } 2299 } 2300 } 2301 2302 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2303 ierr = PetscFree(work);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #undef __FUNCT__ 2308 #define __FUNCT__ "MatRelax_MPISBAIJ" 2309 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2310 { 2311 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2312 PetscErrorCode ierr; 2313 PetscInt mbs=mat->mbs,bs=matin->bs; 2314 PetscScalar *x,*b,*ptr,zero=0.0; 2315 Vec bb1; 2316 2317 PetscFunctionBegin; 2318 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2319 if (bs > 1) 2320 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2321 2322 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2323 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2324 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2325 its--; 2326 } 2327 2328 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2329 while (its--){ 2330 2331 /* lower triangular part: slvec0b = - B^T*xx */ 2332 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2333 2334 /* copy xx into slvec0a */ 2335 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2336 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2337 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2338 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2339 2340 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2341 2342 /* copy bb into slvec1a */ 2343 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2344 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2345 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2346 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2347 2348 /* set slvec1b = 0 */ 2349 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2350 2351 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2352 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2353 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2354 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2355 2356 /* upper triangular part: bb1 = bb1 - B*x */ 2357 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2358 2359 /* local diagonal sweep */ 2360 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2361 } 2362 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2363 } else { 2364 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2365 } 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2371 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2372 { 2373 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2374 PetscErrorCode ierr; 2375 Vec lvec1,bb1; 2376 2377 PetscFunctionBegin; 2378 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2379 if (matin->bs > 1) 2380 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2381 2382 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2383 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2384 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2385 its--; 2386 } 2387 2388 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2389 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2390 while (its--){ 2391 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2392 2393 /* lower diagonal part: bb1 = bb - B^T*xx */ 2394 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2395 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2396 2397 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2398 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2399 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2400 2401 /* upper diagonal part: bb1 = bb1 - B*x */ 2402 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2403 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2404 2405 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2406 2407 /* diagonal sweep */ 2408 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2409 } 2410 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2411 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2412 } else { 2413 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2414 } 2415 PetscFunctionReturn(0); 2416 } 2417 2418