1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 104 N = nrow++ - 1; \ 105 /* shift up all the later entries in this row */ \ 106 for (ii=N; ii>=_i; ii--) { \ 107 rp[ii+1] = rp[ii]; \ 108 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 109 } \ 110 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 111 rp[_i] = bcol; \ 112 ap[bs2*_i + bs*cidx + ridx] = value; \ 113 a_noinsert:; \ 114 ailen[brow] = nrow; \ 115 } 116 #ifndef MatSetValues_SeqBAIJ_B_Private 117 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 118 { \ 119 brow = row/bs; \ 120 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 121 rmax = bimax[brow]; nrow = bilen[brow]; \ 122 bcol = col/bs; \ 123 ridx = row % bs; cidx = col % bs; \ 124 low = 0; high = nrow; \ 125 while (high-low > 3) { \ 126 t = (low+high)/2; \ 127 if (rp[t] > bcol) high = t; \ 128 else low = t; \ 129 } \ 130 for (_i=low; _i<high; _i++) { \ 131 if (rp[_i] > bcol) break; \ 132 if (rp[_i] == bcol) { \ 133 bap = ap + bs2*_i + bs*cidx + ridx; \ 134 if (addv == ADD_VALUES) *bap += value; \ 135 else *bap = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (b->nonew == 1) goto b_noinsert; \ 140 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 141 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 b_noinsert:; \ 152 bilen[brow] = nrow; \ 153 } 154 #endif 155 156 #if defined(PETSC_USE_MAT_SINGLE) 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetValues_MPISBAIJ" 159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 160 { 161 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,N = m*n; 164 MatScalar *vsingle; 165 166 PetscFunctionBegin; 167 if (N > b->setvalueslen) { 168 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 169 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 170 b->setvalueslen = N; 171 } 172 vsingle = b->setvaluescopy; 173 174 for (i=0; i<N; i++) { 175 vsingle[i] = v[i]; 176 } 177 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 184 { 185 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 186 PetscErrorCode ierr; 187 PetscInt i,N = m*n*b->bs2; 188 MatScalar *vsingle; 189 190 PetscFunctionBegin; 191 if (N > b->setvalueslen) { 192 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 193 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 194 b->setvalueslen = N; 195 } 196 vsingle = b->setvaluescopy; 197 for (i=0; i<N; i++) { 198 vsingle[i] = v[i]; 199 } 200 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 #endif 204 205 /* Only add/insert a(i,j) with i<=j (blocks). 206 Any a(i,j) with i>j input by user is ingored. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 211 { 212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 213 MatScalar value; 214 PetscTruth roworiented = baij->roworiented; 215 PetscErrorCode ierr; 216 PetscInt i,j,row,col; 217 PetscInt rstart_orig=baij->rstart_bs; 218 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 219 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 220 221 /* Some Variables required in the macro */ 222 Mat A = baij->A; 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 224 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 225 MatScalar *aa=a->a; 226 227 Mat B = baij->B; 228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 229 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 230 MatScalar *ba=b->a; 231 232 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 233 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 234 MatScalar *ap,*bap; 235 236 /* for stash */ 237 PetscInt n_loc, *in_loc=0; 238 MatScalar *v_loc=0; 239 240 PetscFunctionBegin; 241 242 if(!baij->donotstash){ 243 ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr); 244 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 245 } 246 247 for (i=0; i<m; i++) { 248 if (im[i] < 0) continue; 249 #if defined(PETSC_USE_DEBUG) 250 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 251 #endif 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 253 row = im[i] - rstart_orig; /* local row index */ 254 for (j=0; j<n; j++) { 255 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 256 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 257 col = in[j] - cstart_orig; /* local col index */ 258 brow = row/bs; bcol = col/bs; 259 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 260 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 261 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } else if (in[j] < 0) continue; 264 #if defined(PETSC_USE_DEBUG) 265 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 266 #endif 267 else { /* off-diag entry (B) */ 268 if (mat->was_assembled) { 269 if (!baij->colmap) { 270 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 271 } 272 #if defined (PETSC_USE_CTABLE) 273 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 274 col = col - 1; 275 #else 276 col = baij->colmap[in[j]/bs] - 1; 277 #endif 278 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 279 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 280 col = in[j]; 281 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 282 B = baij->B; 283 b = (Mat_SeqBAIJ*)(B)->data; 284 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 285 ba=b->a; 286 } else col += in[j]%bs; 287 } else col = in[j]; 288 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 289 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 290 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 291 } 292 } 293 } else { /* off processor entry */ 294 if (!baij->donotstash) { 295 n_loc = 0; 296 for (j=0; j<n; j++){ 297 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 298 in_loc[n_loc] = in[j]; 299 if (roworiented) { 300 v_loc[n_loc] = v[i*n+j]; 301 } else { 302 v_loc[n_loc] = v[j*m+i]; 303 } 304 n_loc++; 305 } 306 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 307 } 308 } 309 } 310 311 if(!baij->donotstash){ 312 ierr = PetscFree(in_loc);CHKERRQ(ierr); 313 ierr = PetscFree(v_loc);CHKERRQ(ierr); 314 } 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 321 { 322 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 323 const MatScalar *value; 324 MatScalar *barray=baij->barray; 325 PetscTruth roworiented = baij->roworiented; 326 PetscErrorCode ierr; 327 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 328 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 329 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 330 331 PetscFunctionBegin; 332 if(!barray) { 333 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 334 baij->barray = barray; 335 } 336 337 if (roworiented) { 338 stepval = (n-1)*bs; 339 } else { 340 stepval = (m-1)*bs; 341 } 342 for (i=0; i<m; i++) { 343 if (im[i] < 0) continue; 344 #if defined(PETSC_USE_DEBUG) 345 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 346 #endif 347 if (im[i] >= rstart && im[i] < rend) { 348 row = im[i] - rstart; 349 for (j=0; j<n; j++) { 350 /* If NumCol = 1 then a copy is not required */ 351 if ((roworiented) && (n == 1)) { 352 barray = (MatScalar*) v + i*bs2; 353 } else if((!roworiented) && (m == 1)) { 354 barray = (MatScalar*) v + j*bs2; 355 } else { /* Here a copy is required */ 356 if (roworiented) { 357 value = v + i*(stepval+bs)*bs + j*bs; 358 } else { 359 value = v + j*(stepval+bs)*bs + i*bs; 360 } 361 for (ii=0; ii<bs; ii++,value+=stepval) { 362 for (jj=0; jj<bs; jj++) { 363 *barray++ = *value++; 364 } 365 } 366 barray -=bs2; 367 } 368 369 if (in[j] >= cstart && in[j] < cend){ 370 col = in[j] - cstart; 371 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 372 } 373 else if (in[j] < 0) continue; 374 #if defined(PETSC_USE_DEBUG) 375 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 376 #endif 377 else { 378 if (mat->was_assembled) { 379 if (!baij->colmap) { 380 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 381 } 382 383 #if defined(PETSC_USE_DEBUG) 384 #if defined (PETSC_USE_CTABLE) 385 { PetscInt data; 386 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 387 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 388 } 389 #else 390 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 391 #endif 392 #endif 393 #if defined (PETSC_USE_CTABLE) 394 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 395 col = (col - 1)/bs; 396 #else 397 col = (baij->colmap[in[j]] - 1)/bs; 398 #endif 399 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 400 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 401 col = in[j]; 402 } 403 } 404 else col = in[j]; 405 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 406 } 407 } 408 } else { 409 if (!baij->donotstash) { 410 if (roworiented) { 411 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 412 } else { 413 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 414 } 415 } 416 } 417 } 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatGetValues_MPISBAIJ" 423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 424 { 425 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 426 PetscErrorCode ierr; 427 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 428 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 429 430 PetscFunctionBegin; 431 for (i=0; i<m; i++) { 432 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 433 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 434 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 435 row = idxm[i] - bsrstart; 436 for (j=0; j<n; j++) { 437 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 438 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 439 if (idxn[j] >= bscstart && idxn[j] < bscend){ 440 col = idxn[j] - bscstart; 441 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } else { 443 if (!baij->colmap) { 444 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 445 } 446 #if defined (PETSC_USE_CTABLE) 447 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 448 data --; 449 #else 450 data = baij->colmap[idxn[j]/bs]-1; 451 #endif 452 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 453 else { 454 col = data + idxn[j]%bs; 455 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 456 } 457 } 458 } 459 } else { 460 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 461 } 462 } 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatNorm_MPISBAIJ" 468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 469 { 470 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 471 PetscErrorCode ierr; 472 PetscReal sum[2],*lnorm2; 473 474 PetscFunctionBegin; 475 if (baij->size == 1) { 476 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 477 } else { 478 if (type == NORM_FROBENIUS) { 479 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 480 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 481 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 482 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 483 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 484 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 485 *norm = sqrt(sum[0] + 2*sum[1]); 486 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 487 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 488 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 489 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 490 PetscReal *rsum,*rsum2,vabs; 491 PetscInt *jj,*garray=baij->garray,rstart=baij->rstart,nz; 492 PetscInt brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs; 493 MatScalar *v; 494 495 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 496 rsum2 = rsum + mat->N; 497 ierr = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 498 /* Amat */ 499 v = amat->a; jj = amat->j; 500 for (brow=0; brow<mbs; brow++) { 501 grow = bs*(rstart + brow); 502 nz = amat->i[brow+1] - amat->i[brow]; 503 for (bcol=0; bcol<nz; bcol++){ 504 gcol = bs*(rstart + *jj); jj++; 505 for (col=0; col<bs; col++){ 506 for (row=0; row<bs; row++){ 507 vabs = PetscAbsScalar(*v); v++; 508 rsum[gcol+col] += vabs; 509 /* non-diagonal block */ 510 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 511 } 512 } 513 } 514 } 515 /* Bmat */ 516 v = bmat->a; jj = bmat->j; 517 for (brow=0; brow<mbs; brow++) { 518 grow = bs*(rstart + brow); 519 nz = bmat->i[brow+1] - bmat->i[brow]; 520 for (bcol=0; bcol<nz; bcol++){ 521 gcol = bs*garray[*jj]; jj++; 522 for (col=0; col<bs; col++){ 523 for (row=0; row<bs; row++){ 524 vabs = PetscAbsScalar(*v); v++; 525 rsum[gcol+col] += vabs; 526 rsum[grow+row] += vabs; 527 } 528 } 529 } 530 } 531 ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 532 *norm = 0.0; 533 for (col=0; col<mat->N; col++) { 534 if (rsum2[col] > *norm) *norm = rsum2[col]; 535 } 536 ierr = PetscFree(rsum);CHKERRQ(ierr); 537 } else { 538 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 539 } 540 } 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 547 { 548 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 549 PetscErrorCode ierr; 550 PetscInt nstash,reallocs; 551 InsertMode addv; 552 553 PetscFunctionBegin; 554 if (baij->donotstash) { 555 PetscFunctionReturn(0); 556 } 557 558 /* make sure all processors are either in INSERTMODE or ADDMODE */ 559 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 560 if (addv == (ADD_VALUES|INSERT_VALUES)) { 561 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 562 } 563 mat->insertmode = addv; /* in case this processor had no cache */ 564 565 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 566 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 567 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 568 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 569 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 570 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 577 { 578 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 579 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 580 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 581 PetscErrorCode ierr; 582 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 583 PetscInt *row,*col,other_disassembled; 584 PetscMPIInt n; 585 PetscTruth r1,r2,r3; 586 MatScalar *val; 587 InsertMode addv = mat->insertmode; 588 589 PetscFunctionBegin; 590 591 if (!baij->donotstash) { 592 while (1) { 593 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 594 if (!flg) break; 595 596 for (i=0; i<n;) { 597 /* Now identify the consecutive vals belonging to the same row */ 598 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 599 if (j < n) ncols = j-i; 600 else ncols = n-i; 601 /* Now assemble all these values with a single function call */ 602 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 603 i = j; 604 } 605 } 606 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 607 /* Now process the block-stash. Since the values are stashed column-oriented, 608 set the roworiented flag to column oriented, and after MatSetValues() 609 restore the original flags */ 610 r1 = baij->roworiented; 611 r2 = a->roworiented; 612 r3 = b->roworiented; 613 baij->roworiented = PETSC_FALSE; 614 a->roworiented = PETSC_FALSE; 615 b->roworiented = PETSC_FALSE; 616 while (1) { 617 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 618 if (!flg) break; 619 620 for (i=0; i<n;) { 621 /* Now identify the consecutive vals belonging to the same row */ 622 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 623 if (j < n) ncols = j-i; 624 else ncols = n-i; 625 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 626 i = j; 627 } 628 } 629 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 630 baij->roworiented = r1; 631 a->roworiented = r2; 632 b->roworiented = r3; 633 } 634 635 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 636 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 637 638 /* determine if any processor has disassembled, if so we must 639 also disassemble ourselfs, in order that we may reassemble. */ 640 /* 641 if nonzero structure of submatrix B cannot change then we know that 642 no processor disassembled thus we can skip this stuff 643 */ 644 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 645 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 646 if (mat->was_assembled && !other_disassembled) { 647 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 648 } 649 } 650 651 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 652 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 653 } 654 b->compressedrow.use = PETSC_TRUE; 655 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 656 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 657 658 if (baij->rowvalues) { 659 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 660 baij->rowvalues = 0; 661 } 662 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt bs = mat->bs; 673 PetscMPIInt size = baij->size,rank = baij->rank; 674 PetscTruth iascii,isdraw; 675 PetscViewer sviewer; 676 PetscViewerFormat format; 677 678 PetscFunctionBegin; 679 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 680 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 681 if (iascii) { 682 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 683 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 684 MatInfo info; 685 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 686 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 687 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 688 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 689 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 690 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 692 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 694 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 695 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } else if (format == PETSC_VIEWER_ASCII_INFO) { 698 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 701 PetscFunctionReturn(0); 702 } 703 } 704 705 if (isdraw) { 706 PetscDraw draw; 707 PetscTruth isnull; 708 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 709 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 710 } 711 712 if (size == 1) { 713 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 714 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 715 } else { 716 /* assemble the entire matrix onto first processor. */ 717 Mat A; 718 Mat_SeqSBAIJ *Aloc; 719 Mat_SeqBAIJ *Bloc; 720 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 721 MatScalar *a; 722 723 /* Should this be the same type as mat? */ 724 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 725 if (!rank) { 726 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 727 } else { 728 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 729 } 730 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 731 ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 733 734 /* copy over the A part */ 735 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 736 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 737 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 738 739 for (i=0; i<mbs; i++) { 740 rvals[0] = bs*(baij->rstart + i); 741 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 742 for (j=ai[i]; j<ai[i+1]; j++) { 743 col = (baij->cstart+aj[j])*bs; 744 for (k=0; k<bs; k++) { 745 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 746 col++; a += bs; 747 } 748 } 749 } 750 /* copy over the B part */ 751 Bloc = (Mat_SeqBAIJ*)baij->B->data; 752 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 753 for (i=0; i<mbs; i++) { 754 rvals[0] = bs*(baij->rstart + i); 755 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 756 for (j=ai[i]; j<ai[i+1]; j++) { 757 col = baij->garray[aj[j]]*bs; 758 for (k=0; k<bs; k++) { 759 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 760 col++; a += bs; 761 } 762 } 763 } 764 ierr = PetscFree(rvals);CHKERRQ(ierr); 765 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 /* 768 Everyone has to call to draw the matrix since the graphics waits are 769 synchronized across all processors that share the PetscDraw object 770 */ 771 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 772 if (!rank) { 773 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 774 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 775 } 776 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 777 ierr = MatDestroy(A);CHKERRQ(ierr); 778 } 779 PetscFunctionReturn(0); 780 } 781 782 #undef __FUNCT__ 783 #define __FUNCT__ "MatView_MPISBAIJ" 784 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 785 { 786 PetscErrorCode ierr; 787 PetscTruth iascii,isdraw,issocket,isbinary; 788 789 PetscFunctionBegin; 790 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 791 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 792 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 793 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 794 if (iascii || isdraw || issocket || isbinary) { 795 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 796 } else { 797 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 798 } 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatDestroy_MPISBAIJ" 804 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 805 { 806 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 807 PetscErrorCode ierr; 808 809 PetscFunctionBegin; 810 #if defined(PETSC_USE_LOG) 811 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 812 #endif 813 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 814 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 815 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 816 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 817 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 818 #if defined (PETSC_USE_CTABLE) 819 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 820 #else 821 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 822 #endif 823 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 824 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 825 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 826 if (baij->slvec0) { 827 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 828 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 829 } 830 if (baij->slvec1) { 831 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 832 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 833 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 834 } 835 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 836 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 837 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 838 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 839 #if defined(PETSC_USE_MAT_SINGLE) 840 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 841 #endif 842 ierr = PetscFree(baij);CHKERRQ(ierr); 843 844 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 845 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 846 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 847 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatMult_MPISBAIJ" 853 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 854 { 855 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 856 PetscErrorCode ierr; 857 PetscInt nt,mbs=a->mbs,bs=A->bs; 858 PetscScalar *x,*from,zero=0.0; 859 860 PetscFunctionBegin; 861 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 862 if (nt != A->n) { 863 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 864 } 865 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 866 if (nt != A->m) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 868 } 869 870 /* diagonal part */ 871 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 872 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 873 874 /* subdiagonal part */ 875 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 876 877 /* copy x into the vec slvec0 */ 878 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 879 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 880 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 881 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 882 883 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 884 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 885 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 886 887 /* supperdiagonal part */ 888 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 889 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 895 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 896 { 897 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 898 PetscErrorCode ierr; 899 PetscInt nt; 900 901 PetscFunctionBegin; 902 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 903 if (nt != A->n) { 904 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 905 } 906 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 907 if (nt != A->m) { 908 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 909 } 910 911 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 912 /* do diagonal part */ 913 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 914 /* do supperdiagonal part */ 915 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 916 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 917 /* do subdiagonal part */ 918 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 919 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 920 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 921 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 927 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 928 { 929 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 930 PetscErrorCode ierr; 931 PetscInt mbs=a->mbs,bs=A->bs; 932 PetscScalar *x,*from,zero=0.0; 933 934 PetscFunctionBegin; 935 /* 936 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 937 PetscSynchronizedFlush(A->comm); 938 */ 939 /* diagonal part */ 940 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 941 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 942 943 /* subdiagonal part */ 944 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 945 946 /* copy x into the vec slvec0 */ 947 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 948 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 949 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 950 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 951 952 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 953 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 954 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 955 956 /* supperdiagonal part */ 957 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 958 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 964 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 965 { 966 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 967 PetscErrorCode ierr; 968 969 PetscFunctionBegin; 970 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 971 /* do diagonal part */ 972 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 973 /* do supperdiagonal part */ 974 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 975 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 976 977 /* do subdiagonal part */ 978 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 979 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 980 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 981 982 PetscFunctionReturn(0); 983 } 984 985 /* 986 This only works correctly for square matrices where the subblock A->A is the 987 diagonal block 988 */ 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 991 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 992 { 993 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 994 PetscErrorCode ierr; 995 996 PetscFunctionBegin; 997 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 998 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatScale_MPISBAIJ" 1004 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1005 { 1006 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1007 PetscErrorCode ierr; 1008 1009 PetscFunctionBegin; 1010 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1011 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1017 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1018 { 1019 PetscFunctionBegin; 1020 if (matin) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format"); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1026 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1027 { 1028 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1029 1030 PetscFunctionBegin; 1031 if (!baij->getrowactive) { 1032 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1033 } 1034 baij->getrowactive = PETSC_FALSE; 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1040 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1041 { 1042 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1043 PetscErrorCode ierr; 1044 1045 PetscFunctionBegin; 1046 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1047 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1053 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1054 { 1055 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1056 PetscErrorCode ierr; 1057 1058 PetscFunctionBegin; 1059 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1060 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1066 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1067 { 1068 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1069 PetscErrorCode ierr; 1070 1071 PetscFunctionBegin; 1072 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1073 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1079 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1080 { 1081 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1082 Mat A = a->A,B = a->B; 1083 PetscErrorCode ierr; 1084 PetscReal isend[5],irecv[5]; 1085 1086 PetscFunctionBegin; 1087 info->block_size = (PetscReal)matin->bs; 1088 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1089 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1090 isend[3] = info->memory; isend[4] = info->mallocs; 1091 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1092 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1093 isend[3] += info->memory; isend[4] += info->mallocs; 1094 if (flag == MAT_LOCAL) { 1095 info->nz_used = isend[0]; 1096 info->nz_allocated = isend[1]; 1097 info->nz_unneeded = isend[2]; 1098 info->memory = isend[3]; 1099 info->mallocs = isend[4]; 1100 } else if (flag == MAT_GLOBAL_MAX) { 1101 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1102 info->nz_used = irecv[0]; 1103 info->nz_allocated = irecv[1]; 1104 info->nz_unneeded = irecv[2]; 1105 info->memory = irecv[3]; 1106 info->mallocs = irecv[4]; 1107 } else if (flag == MAT_GLOBAL_SUM) { 1108 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1109 info->nz_used = irecv[0]; 1110 info->nz_allocated = irecv[1]; 1111 info->nz_unneeded = irecv[2]; 1112 info->memory = irecv[3]; 1113 info->mallocs = irecv[4]; 1114 } else { 1115 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1116 } 1117 info->rows_global = (PetscReal)A->M; 1118 info->columns_global = (PetscReal)A->N; 1119 info->rows_local = (PetscReal)A->m; 1120 info->columns_local = (PetscReal)A->N; 1121 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1122 info->fill_ratio_needed = 0; 1123 info->factor_mallocs = 0; 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1129 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1130 { 1131 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 switch (op) { 1136 case MAT_NO_NEW_NONZERO_LOCATIONS: 1137 case MAT_YES_NEW_NONZERO_LOCATIONS: 1138 case MAT_COLUMNS_UNSORTED: 1139 case MAT_COLUMNS_SORTED: 1140 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1141 case MAT_KEEP_ZEROED_ROWS: 1142 case MAT_NEW_NONZERO_LOCATION_ERR: 1143 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1144 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1145 break; 1146 case MAT_ROW_ORIENTED: 1147 a->roworiented = PETSC_TRUE; 1148 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1149 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1150 break; 1151 case MAT_ROWS_SORTED: 1152 case MAT_ROWS_UNSORTED: 1153 case MAT_YES_NEW_DIAGONALS: 1154 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1155 break; 1156 case MAT_COLUMN_ORIENTED: 1157 a->roworiented = PETSC_FALSE; 1158 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1159 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1160 break; 1161 case MAT_IGNORE_OFF_PROC_ENTRIES: 1162 a->donotstash = PETSC_TRUE; 1163 break; 1164 case MAT_NO_NEW_DIAGONALS: 1165 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1166 case MAT_USE_HASH_TABLE: 1167 a->ht_flag = PETSC_TRUE; 1168 break; 1169 case MAT_NOT_SYMMETRIC: 1170 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1171 case MAT_HERMITIAN: 1172 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1173 case MAT_SYMMETRIC: 1174 case MAT_STRUCTURALLY_SYMMETRIC: 1175 case MAT_NOT_HERMITIAN: 1176 case MAT_SYMMETRY_ETERNAL: 1177 case MAT_NOT_SYMMETRY_ETERNAL: 1178 break; 1179 default: 1180 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1187 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1188 { 1189 PetscErrorCode ierr; 1190 PetscFunctionBegin; 1191 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1197 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1198 { 1199 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1200 Mat a=baij->A, b=baij->B; 1201 PetscErrorCode ierr; 1202 PetscInt nv,m,n; 1203 PetscTruth flg; 1204 1205 PetscFunctionBegin; 1206 if (ll != rr){ 1207 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1208 if (!flg) 1209 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1210 } 1211 if (!ll) PetscFunctionReturn(0); 1212 1213 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1214 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1215 1216 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1217 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1218 1219 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1220 1221 /* left diagonalscale the off-diagonal part */ 1222 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1223 1224 /* scale the diagonal part */ 1225 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1226 1227 /* right diagonalscale the off-diagonal part */ 1228 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1229 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1235 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1236 { 1237 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1238 MPI_Comm comm = A->comm; 1239 static PetscTruth called = PETSC_FALSE; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 if (!a->rank) { 1244 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1245 } 1246 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1247 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1248 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1254 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1255 { 1256 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatEqual_MPISBAIJ" 1268 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1269 { 1270 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1271 Mat a,b,c,d; 1272 PetscTruth flg; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 a = matA->A; b = matA->B; 1277 c = matB->A; d = matB->B; 1278 1279 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1280 if (flg) { 1281 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1282 } 1283 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1289 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1290 { 1291 PetscErrorCode ierr; 1292 1293 PetscFunctionBegin; 1294 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #include "petscblaslapack.h" 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1301 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1302 { 1303 PetscErrorCode ierr; 1304 PetscInt i; 1305 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1306 PetscBLASInt bnz,one=1; 1307 Mat_SeqSBAIJ *xa,*ya; 1308 Mat_SeqBAIJ *xb,*yb; 1309 1310 PetscFunctionBegin; 1311 if (str == SAME_NONZERO_PATTERN) { 1312 PetscScalar alpha = a; 1313 xa = (Mat_SeqSBAIJ *)xx->A->data; 1314 ya = (Mat_SeqSBAIJ *)yy->A->data; 1315 bnz = (PetscBLASInt)xa->nz; 1316 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1317 xb = (Mat_SeqBAIJ *)xx->B->data; 1318 yb = (Mat_SeqBAIJ *)yy->B->data; 1319 bnz = (PetscBLASInt)xb->nz; 1320 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1321 } else { 1322 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1323 } 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1329 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1330 { 1331 PetscErrorCode ierr; 1332 PetscInt i; 1333 PetscTruth flg; 1334 1335 PetscFunctionBegin; 1336 for (i=0; i<n; i++) { 1337 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1338 if (!flg) { 1339 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1340 } 1341 } 1342 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 1347 /* -------------------------------------------------------------------*/ 1348 static struct _MatOps MatOps_Values = { 1349 MatSetValues_MPISBAIJ, 1350 MatGetRow_MPISBAIJ, 1351 MatRestoreRow_MPISBAIJ, 1352 MatMult_MPISBAIJ, 1353 /* 4*/ MatMultAdd_MPISBAIJ, 1354 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1355 MatMultAdd_MPISBAIJ, 1356 0, 1357 0, 1358 0, 1359 /*10*/ 0, 1360 0, 1361 0, 1362 MatRelax_MPISBAIJ, 1363 MatTranspose_MPISBAIJ, 1364 /*15*/ MatGetInfo_MPISBAIJ, 1365 MatEqual_MPISBAIJ, 1366 MatGetDiagonal_MPISBAIJ, 1367 MatDiagonalScale_MPISBAIJ, 1368 MatNorm_MPISBAIJ, 1369 /*20*/ MatAssemblyBegin_MPISBAIJ, 1370 MatAssemblyEnd_MPISBAIJ, 1371 0, 1372 MatSetOption_MPISBAIJ, 1373 MatZeroEntries_MPISBAIJ, 1374 /*25*/ 0, 1375 0, 1376 0, 1377 0, 1378 0, 1379 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1380 0, 1381 0, 1382 0, 1383 0, 1384 /*35*/ MatDuplicate_MPISBAIJ, 1385 0, 1386 0, 1387 0, 1388 0, 1389 /*40*/ MatAXPY_MPISBAIJ, 1390 MatGetSubMatrices_MPISBAIJ, 1391 MatIncreaseOverlap_MPISBAIJ, 1392 MatGetValues_MPISBAIJ, 1393 0, 1394 /*45*/ MatPrintHelp_MPISBAIJ, 1395 MatScale_MPISBAIJ, 1396 0, 1397 0, 1398 0, 1399 /*50*/ 0, 1400 0, 1401 0, 1402 0, 1403 0, 1404 /*55*/ 0, 1405 0, 1406 MatSetUnfactored_MPISBAIJ, 1407 0, 1408 MatSetValuesBlocked_MPISBAIJ, 1409 /*60*/ 0, 1410 0, 1411 0, 1412 MatGetPetscMaps_Petsc, 1413 0, 1414 /*65*/ 0, 1415 0, 1416 0, 1417 0, 1418 0, 1419 /*70*/ MatGetRowMax_MPISBAIJ, 1420 0, 1421 0, 1422 0, 1423 0, 1424 /*75*/ 0, 1425 0, 1426 0, 1427 0, 1428 0, 1429 /*80*/ 0, 1430 0, 1431 0, 1432 0, 1433 MatLoad_MPISBAIJ, 1434 /*85*/ 0, 1435 0, 1436 0, 1437 0, 1438 0, 1439 /*90*/ 0, 1440 0, 1441 0, 1442 0, 1443 0, 1444 /*95*/ 0, 1445 0, 1446 0, 1447 0, 1448 0, 1449 /*100*/0, 1450 0, 1451 0, 1452 0, 1453 0, 1454 /*105*/0, 1455 MatRealPart_MPISBAIJ, 1456 MatImaginaryPart_MPISBAIJ 1457 }; 1458 1459 1460 EXTERN_C_BEGIN 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1463 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1464 { 1465 PetscFunctionBegin; 1466 *a = ((Mat_MPISBAIJ *)A->data)->A; 1467 *iscopy = PETSC_FALSE; 1468 PetscFunctionReturn(0); 1469 } 1470 EXTERN_C_END 1471 1472 EXTERN_C_BEGIN 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1475 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1476 { 1477 Mat_MPISBAIJ *b; 1478 PetscErrorCode ierr; 1479 PetscInt i,mbs,Mbs; 1480 1481 PetscFunctionBegin; 1482 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1483 1484 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1485 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1486 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1487 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1488 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1489 if (d_nnz) { 1490 for (i=0; i<B->m/bs; i++) { 1491 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1492 } 1493 } 1494 if (o_nnz) { 1495 for (i=0; i<B->m/bs; i++) { 1496 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1497 } 1498 } 1499 B->preallocated = PETSC_TRUE; 1500 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1501 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1502 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1503 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1504 1505 b = (Mat_MPISBAIJ*)B->data; 1506 mbs = B->m/bs; 1507 Mbs = B->M/bs; 1508 if (mbs*bs != B->m) { 1509 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs); 1510 } 1511 1512 B->bs = bs; 1513 b->bs2 = bs*bs; 1514 b->mbs = mbs; 1515 b->nbs = mbs; 1516 b->Mbs = Mbs; 1517 b->Nbs = Mbs; 1518 1519 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1520 b->rowners[0] = 0; 1521 for (i=2; i<=b->size; i++) { 1522 b->rowners[i] += b->rowners[i-1]; 1523 } 1524 b->rstart = b->rowners[b->rank]; 1525 b->rend = b->rowners[b->rank+1]; 1526 b->cstart = b->rstart; 1527 b->cend = b->rend; 1528 for (i=0; i<=b->size; i++) { 1529 b->rowners_bs[i] = b->rowners[i]*bs; 1530 } 1531 b->rstart_bs = b-> rstart*bs; 1532 b->rend_bs = b->rend*bs; 1533 1534 b->cstart_bs = b->cstart*bs; 1535 b->cend_bs = b->cend*bs; 1536 1537 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1538 ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr); 1539 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1540 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1541 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1542 1543 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1544 ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr); 1545 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1546 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1547 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1548 1549 /* build cache for off array entries formed */ 1550 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1551 1552 PetscFunctionReturn(0); 1553 } 1554 EXTERN_C_END 1555 1556 /*MC 1557 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1558 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1559 1560 Options Database Keys: 1561 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1562 1563 Level: beginner 1564 1565 .seealso: MatCreateMPISBAIJ 1566 M*/ 1567 1568 EXTERN_C_BEGIN 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatCreate_MPISBAIJ" 1571 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1572 { 1573 Mat_MPISBAIJ *b; 1574 PetscErrorCode ierr; 1575 PetscTruth flg; 1576 1577 PetscFunctionBegin; 1578 1579 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1580 B->data = (void*)b; 1581 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1582 1583 B->ops->destroy = MatDestroy_MPISBAIJ; 1584 B->ops->view = MatView_MPISBAIJ; 1585 B->mapping = 0; 1586 B->factor = 0; 1587 B->assembled = PETSC_FALSE; 1588 1589 B->insertmode = NOT_SET_VALUES; 1590 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1591 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1592 1593 /* build local table of row and column ownerships */ 1594 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 1595 b->cowners = b->rowners + b->size + 2; 1596 b->rowners_bs = b->cowners + b->size + 2; 1597 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1598 1599 /* build cache for off array entries formed */ 1600 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1601 b->donotstash = PETSC_FALSE; 1602 b->colmap = PETSC_NULL; 1603 b->garray = PETSC_NULL; 1604 b->roworiented = PETSC_TRUE; 1605 1606 #if defined(PETSC_USE_MAT_SINGLE) 1607 /* stuff for MatSetValues_XXX in single precision */ 1608 b->setvalueslen = 0; 1609 b->setvaluescopy = PETSC_NULL; 1610 #endif 1611 1612 /* stuff used in block assembly */ 1613 b->barray = 0; 1614 1615 /* stuff used for matrix vector multiply */ 1616 b->lvec = 0; 1617 b->Mvctx = 0; 1618 b->slvec0 = 0; 1619 b->slvec0b = 0; 1620 b->slvec1 = 0; 1621 b->slvec1a = 0; 1622 b->slvec1b = 0; 1623 b->sMvctx = 0; 1624 1625 /* stuff for MatGetRow() */ 1626 b->rowindices = 0; 1627 b->rowvalues = 0; 1628 b->getrowactive = PETSC_FALSE; 1629 1630 /* hash table stuff */ 1631 b->ht = 0; 1632 b->hd = 0; 1633 b->ht_size = 0; 1634 b->ht_flag = PETSC_FALSE; 1635 b->ht_fact = 0; 1636 b->ht_total_ct = 0; 1637 b->ht_insert_ct = 0; 1638 1639 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1640 if (flg) { 1641 PetscReal fact = 1.39; 1642 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1643 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1644 if (fact <= 1.0) fact = 1.39; 1645 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1646 ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 1647 } 1648 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1649 "MatStoreValues_MPISBAIJ", 1650 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1651 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1652 "MatRetrieveValues_MPISBAIJ", 1653 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1654 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1655 "MatGetDiagonalBlock_MPISBAIJ", 1656 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1657 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1658 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1659 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1660 B->symmetric = PETSC_TRUE; 1661 B->structurally_symmetric = PETSC_TRUE; 1662 B->symmetric_set = PETSC_TRUE; 1663 B->structurally_symmetric_set = PETSC_TRUE; 1664 PetscFunctionReturn(0); 1665 } 1666 EXTERN_C_END 1667 1668 /*MC 1669 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1670 1671 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1672 and MATMPISBAIJ otherwise. 1673 1674 Options Database Keys: 1675 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1676 1677 Level: beginner 1678 1679 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1680 M*/ 1681 1682 EXTERN_C_BEGIN 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "MatCreate_SBAIJ" 1685 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1686 { 1687 PetscErrorCode ierr; 1688 PetscMPIInt size; 1689 1690 PetscFunctionBegin; 1691 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1692 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1693 if (size == 1) { 1694 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1695 } else { 1696 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1697 } 1698 PetscFunctionReturn(0); 1699 } 1700 EXTERN_C_END 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1704 /*@C 1705 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1706 the user should preallocate the matrix storage by setting the parameters 1707 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1708 performance can be increased by more than a factor of 50. 1709 1710 Collective on Mat 1711 1712 Input Parameters: 1713 + A - the matrix 1714 . bs - size of blockk 1715 . d_nz - number of block nonzeros per block row in diagonal portion of local 1716 submatrix (same for all local rows) 1717 . d_nnz - array containing the number of block nonzeros in the various block rows 1718 in the upper triangular and diagonal part of the in diagonal portion of the local 1719 (possibly different for each block row) or PETSC_NULL. You must leave room 1720 for the diagonal entry even if it is zero. 1721 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1722 submatrix (same for all local rows). 1723 - o_nnz - array containing the number of nonzeros in the various block rows of the 1724 off-diagonal portion of the local submatrix (possibly different for 1725 each block row) or PETSC_NULL. 1726 1727 1728 Options Database Keys: 1729 . -mat_no_unroll - uses code that does not unroll the loops in the 1730 block calculations (much slower) 1731 . -mat_block_size - size of the blocks to use 1732 1733 Notes: 1734 1735 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1736 than it must be used on all processors that share the object for that argument. 1737 1738 If the *_nnz parameter is given then the *_nz parameter is ignored 1739 1740 Storage Information: 1741 For a square global matrix we define each processor's diagonal portion 1742 to be its local rows and the corresponding columns (a square submatrix); 1743 each processor's off-diagonal portion encompasses the remainder of the 1744 local matrix (a rectangular submatrix). 1745 1746 The user can specify preallocated storage for the diagonal part of 1747 the local submatrix with either d_nz or d_nnz (not both). Set 1748 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1749 memory allocation. Likewise, specify preallocated storage for the 1750 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1751 1752 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1753 the figure below we depict these three local rows and all columns (0-11). 1754 1755 .vb 1756 0 1 2 3 4 5 6 7 8 9 10 11 1757 ------------------- 1758 row 3 | o o o d d d o o o o o o 1759 row 4 | o o o d d d o o o o o o 1760 row 5 | o o o d d d o o o o o o 1761 ------------------- 1762 .ve 1763 1764 Thus, any entries in the d locations are stored in the d (diagonal) 1765 submatrix, and any entries in the o locations are stored in the 1766 o (off-diagonal) submatrix. Note that the d matrix is stored in 1767 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1768 1769 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1770 plus the diagonal part of the d matrix, 1771 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1772 In general, for PDE problems in which most nonzeros are near the diagonal, 1773 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1774 or you will get TERRIBLE performance; see the users' manual chapter on 1775 matrices. 1776 1777 Level: intermediate 1778 1779 .keywords: matrix, block, aij, compressed row, sparse, parallel 1780 1781 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1782 @*/ 1783 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1784 { 1785 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1786 1787 PetscFunctionBegin; 1788 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1789 if (f) { 1790 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1791 } 1792 PetscFunctionReturn(0); 1793 } 1794 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatCreateMPISBAIJ" 1797 /*@C 1798 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1799 (block compressed row). For good matrix assembly performance 1800 the user should preallocate the matrix storage by setting the parameters 1801 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1802 performance can be increased by more than a factor of 50. 1803 1804 Collective on MPI_Comm 1805 1806 Input Parameters: 1807 + comm - MPI communicator 1808 . bs - size of blockk 1809 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1810 This value should be the same as the local size used in creating the 1811 y vector for the matrix-vector product y = Ax. 1812 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1813 This value should be the same as the local size used in creating the 1814 x vector for the matrix-vector product y = Ax. 1815 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1816 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1817 . d_nz - number of block nonzeros per block row in diagonal portion of local 1818 submatrix (same for all local rows) 1819 . d_nnz - array containing the number of block nonzeros in the various block rows 1820 in the upper triangular portion of the in diagonal portion of the local 1821 (possibly different for each block block row) or PETSC_NULL. 1822 You must leave room for the diagonal entry even if it is zero. 1823 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1824 submatrix (same for all local rows). 1825 - o_nnz - array containing the number of nonzeros in the various block rows of the 1826 off-diagonal portion of the local submatrix (possibly different for 1827 each block row) or PETSC_NULL. 1828 1829 Output Parameter: 1830 . A - the matrix 1831 1832 Options Database Keys: 1833 . -mat_no_unroll - uses code that does not unroll the loops in the 1834 block calculations (much slower) 1835 . -mat_block_size - size of the blocks to use 1836 . -mat_mpi - use the parallel matrix data structures even on one processor 1837 (defaults to using SeqBAIJ format on one processor) 1838 1839 Notes: 1840 The number of rows and columns must be divisible by blocksize. 1841 1842 The user MUST specify either the local or global matrix dimensions 1843 (possibly both). 1844 1845 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1846 than it must be used on all processors that share the object for that argument. 1847 1848 If the *_nnz parameter is given then the *_nz parameter is ignored 1849 1850 Storage Information: 1851 For a square global matrix we define each processor's diagonal portion 1852 to be its local rows and the corresponding columns (a square submatrix); 1853 each processor's off-diagonal portion encompasses the remainder of the 1854 local matrix (a rectangular submatrix). 1855 1856 The user can specify preallocated storage for the diagonal part of 1857 the local submatrix with either d_nz or d_nnz (not both). Set 1858 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1859 memory allocation. Likewise, specify preallocated storage for the 1860 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1861 1862 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1863 the figure below we depict these three local rows and all columns (0-11). 1864 1865 .vb 1866 0 1 2 3 4 5 6 7 8 9 10 11 1867 ------------------- 1868 row 3 | o o o d d d o o o o o o 1869 row 4 | o o o d d d o o o o o o 1870 row 5 | o o o d d d o o o o o o 1871 ------------------- 1872 .ve 1873 1874 Thus, any entries in the d locations are stored in the d (diagonal) 1875 submatrix, and any entries in the o locations are stored in the 1876 o (off-diagonal) submatrix. Note that the d matrix is stored in 1877 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1878 1879 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1880 plus the diagonal part of the d matrix, 1881 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1882 In general, for PDE problems in which most nonzeros are near the diagonal, 1883 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1884 or you will get TERRIBLE performance; see the users' manual chapter on 1885 matrices. 1886 1887 Level: intermediate 1888 1889 .keywords: matrix, block, aij, compressed row, sparse, parallel 1890 1891 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1892 @*/ 1893 1894 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 1895 { 1896 PetscErrorCode ierr; 1897 PetscMPIInt size; 1898 1899 PetscFunctionBegin; 1900 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1901 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1902 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1903 if (size > 1) { 1904 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1905 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1906 } else { 1907 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1908 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1909 } 1910 PetscFunctionReturn(0); 1911 } 1912 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1916 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1917 { 1918 Mat mat; 1919 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1920 PetscErrorCode ierr; 1921 PetscInt len=0,nt,bs=matin->bs,mbs=oldmat->mbs; 1922 PetscScalar *array; 1923 1924 PetscFunctionBegin; 1925 *newmat = 0; 1926 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1927 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1928 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1929 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1930 1931 mat->factor = matin->factor; 1932 mat->preallocated = PETSC_TRUE; 1933 mat->assembled = PETSC_TRUE; 1934 mat->insertmode = NOT_SET_VALUES; 1935 1936 a = (Mat_MPISBAIJ*)mat->data; 1937 mat->bs = matin->bs; 1938 a->bs2 = oldmat->bs2; 1939 a->mbs = oldmat->mbs; 1940 a->nbs = oldmat->nbs; 1941 a->Mbs = oldmat->Mbs; 1942 a->Nbs = oldmat->Nbs; 1943 1944 a->rstart = oldmat->rstart; 1945 a->rend = oldmat->rend; 1946 a->cstart = oldmat->cstart; 1947 a->cend = oldmat->cend; 1948 a->size = oldmat->size; 1949 a->rank = oldmat->rank; 1950 a->donotstash = oldmat->donotstash; 1951 a->roworiented = oldmat->roworiented; 1952 a->rowindices = 0; 1953 a->rowvalues = 0; 1954 a->getrowactive = PETSC_FALSE; 1955 a->barray = 0; 1956 a->rstart_bs = oldmat->rstart_bs; 1957 a->rend_bs = oldmat->rend_bs; 1958 a->cstart_bs = oldmat->cstart_bs; 1959 a->cend_bs = oldmat->cend_bs; 1960 1961 /* hash table stuff */ 1962 a->ht = 0; 1963 a->hd = 0; 1964 a->ht_size = 0; 1965 a->ht_flag = oldmat->ht_flag; 1966 a->ht_fact = oldmat->ht_fact; 1967 a->ht_total_ct = 0; 1968 a->ht_insert_ct = 0; 1969 1970 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1971 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1972 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 1973 if (oldmat->colmap) { 1974 #if defined (PETSC_USE_CTABLE) 1975 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1976 #else 1977 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1978 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1979 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1980 #endif 1981 } else a->colmap = 0; 1982 1983 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 1984 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1985 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1986 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 1987 } else a->garray = 0; 1988 1989 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1990 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1991 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1992 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1993 1994 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 1995 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 1996 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 1997 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 1998 1999 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2000 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2001 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2002 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2003 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2004 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2005 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2006 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2007 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2008 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2009 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2010 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2011 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2012 2013 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2014 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2015 a->sMvctx = oldmat->sMvctx; 2016 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2017 2018 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2019 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2020 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2021 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2022 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2023 *newmat = mat; 2024 PetscFunctionReturn(0); 2025 } 2026 2027 #include "petscsys.h" 2028 2029 #undef __FUNCT__ 2030 #define __FUNCT__ "MatLoad_MPISBAIJ" 2031 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2032 { 2033 Mat A; 2034 PetscErrorCode ierr; 2035 PetscInt i,nz,j,rstart,rend; 2036 PetscScalar *vals,*buf; 2037 MPI_Comm comm = ((PetscObject)viewer)->comm; 2038 MPI_Status status; 2039 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 2040 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2041 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2042 PetscInt bs=1,Mbs,mbs,extra_rows; 2043 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2044 PetscInt dcount,kmax,k,nzcount,tmp; 2045 int fd; 2046 2047 PetscFunctionBegin; 2048 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2049 2050 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2051 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2052 if (!rank) { 2053 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2054 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2055 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2056 if (header[3] < 0) { 2057 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2058 } 2059 } 2060 2061 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2062 M = header[1]; N = header[2]; 2063 2064 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2065 2066 /* 2067 This code adds extra rows to make sure the number of rows is 2068 divisible by the blocksize 2069 */ 2070 Mbs = M/bs; 2071 extra_rows = bs - M + bs*(Mbs); 2072 if (extra_rows == bs) extra_rows = 0; 2073 else Mbs++; 2074 if (extra_rows &&!rank) { 2075 ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2076 } 2077 2078 /* determine ownership of all rows */ 2079 mbs = Mbs/size + ((Mbs % size) > rank); 2080 m = mbs*bs; 2081 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2082 browners = rowners + size + 1; 2083 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2084 rowners[0] = 0; 2085 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2086 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2087 rstart = rowners[rank]; 2088 rend = rowners[rank+1]; 2089 2090 /* distribute row lengths to all processors */ 2091 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2092 if (!rank) { 2093 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2094 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2095 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2096 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2097 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2098 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2099 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2100 } else { 2101 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2102 } 2103 2104 if (!rank) { /* procs[0] */ 2105 /* calculate the number of nonzeros on each processor */ 2106 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2107 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2108 for (i=0; i<size; i++) { 2109 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2110 procsnz[i] += rowlengths[j]; 2111 } 2112 } 2113 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2114 2115 /* determine max buffer needed and allocate it */ 2116 maxnz = 0; 2117 for (i=0; i<size; i++) { 2118 maxnz = PetscMax(maxnz,procsnz[i]); 2119 } 2120 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2121 2122 /* read in my part of the matrix column indices */ 2123 nz = procsnz[0]; 2124 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2125 mycols = ibuf; 2126 if (size == 1) nz -= extra_rows; 2127 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2128 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2129 2130 /* read in every ones (except the last) and ship off */ 2131 for (i=1; i<size-1; i++) { 2132 nz = procsnz[i]; 2133 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2134 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2135 } 2136 /* read in the stuff for the last proc */ 2137 if (size != 1) { 2138 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2139 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2140 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2141 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2142 } 2143 ierr = PetscFree(cols);CHKERRQ(ierr); 2144 } else { /* procs[i], i>0 */ 2145 /* determine buffer space needed for message */ 2146 nz = 0; 2147 for (i=0; i<m; i++) { 2148 nz += locrowlens[i]; 2149 } 2150 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2151 mycols = ibuf; 2152 /* receive message of column indices*/ 2153 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2154 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2155 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2156 } 2157 2158 /* loop over local rows, determining number of off diagonal entries */ 2159 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2160 odlens = dlens + (rend-rstart); 2161 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2162 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2163 masked1 = mask + Mbs; 2164 masked2 = masked1 + Mbs; 2165 rowcount = 0; nzcount = 0; 2166 for (i=0; i<mbs; i++) { 2167 dcount = 0; 2168 odcount = 0; 2169 for (j=0; j<bs; j++) { 2170 kmax = locrowlens[rowcount]; 2171 for (k=0; k<kmax; k++) { 2172 tmp = mycols[nzcount++]/bs; /* block col. index */ 2173 if (!mask[tmp]) { 2174 mask[tmp] = 1; 2175 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2176 else masked1[dcount++] = tmp; /* entry in diag portion */ 2177 } 2178 } 2179 rowcount++; 2180 } 2181 2182 dlens[i] = dcount; /* d_nzz[i] */ 2183 odlens[i] = odcount; /* o_nzz[i] */ 2184 2185 /* zero out the mask elements we set */ 2186 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2187 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2188 } 2189 2190 /* create our matrix */ 2191 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2192 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2193 ierr = MatSetType(A,type);CHKERRQ(ierr); 2194 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2195 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2196 2197 if (!rank) { 2198 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2199 /* read in my part of the matrix numerical values */ 2200 nz = procsnz[0]; 2201 vals = buf; 2202 mycols = ibuf; 2203 if (size == 1) nz -= extra_rows; 2204 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2205 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2206 2207 /* insert into matrix */ 2208 jj = rstart*bs; 2209 for (i=0; i<m; i++) { 2210 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2211 mycols += locrowlens[i]; 2212 vals += locrowlens[i]; 2213 jj++; 2214 } 2215 2216 /* read in other processors (except the last one) and ship out */ 2217 for (i=1; i<size-1; i++) { 2218 nz = procsnz[i]; 2219 vals = buf; 2220 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2221 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2222 } 2223 /* the last proc */ 2224 if (size != 1){ 2225 nz = procsnz[i] - extra_rows; 2226 vals = buf; 2227 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2228 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2229 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2230 } 2231 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2232 2233 } else { 2234 /* receive numeric values */ 2235 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2236 2237 /* receive message of values*/ 2238 vals = buf; 2239 mycols = ibuf; 2240 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2241 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2242 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2243 2244 /* insert into matrix */ 2245 jj = rstart*bs; 2246 for (i=0; i<m; i++) { 2247 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2248 mycols += locrowlens[i]; 2249 vals += locrowlens[i]; 2250 jj++; 2251 } 2252 } 2253 2254 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2255 ierr = PetscFree(buf);CHKERRQ(ierr); 2256 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2257 ierr = PetscFree(rowners);CHKERRQ(ierr); 2258 ierr = PetscFree(dlens);CHKERRQ(ierr); 2259 ierr = PetscFree(mask);CHKERRQ(ierr); 2260 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2261 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2262 *newmat = A; 2263 PetscFunctionReturn(0); 2264 } 2265 2266 #undef __FUNCT__ 2267 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2268 /*XXXXX@ 2269 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2270 2271 Input Parameters: 2272 . mat - the matrix 2273 . fact - factor 2274 2275 Collective on Mat 2276 2277 Level: advanced 2278 2279 Notes: 2280 This can also be set by the command line option: -mat_use_hash_table fact 2281 2282 .keywords: matrix, hashtable, factor, HT 2283 2284 .seealso: MatSetOption() 2285 @XXXXX*/ 2286 2287 2288 #undef __FUNCT__ 2289 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2290 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2291 { 2292 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2293 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2294 PetscReal atmp; 2295 PetscReal *work,*svalues,*rvalues; 2296 PetscErrorCode ierr; 2297 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2298 PetscMPIInt rank,size; 2299 PetscInt *rowners_bs,dest,count,source; 2300 PetscScalar *va; 2301 MatScalar *ba; 2302 MPI_Status stat; 2303 2304 PetscFunctionBegin; 2305 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2306 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2307 2308 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2309 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2310 2311 bs = A->bs; 2312 mbs = a->mbs; 2313 Mbs = a->Mbs; 2314 ba = b->a; 2315 bi = b->i; 2316 bj = b->j; 2317 2318 /* find ownerships */ 2319 rowners_bs = a->rowners_bs; 2320 2321 /* each proc creates an array to be distributed */ 2322 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2323 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2324 2325 /* row_max for B */ 2326 if (rank != size-1){ 2327 for (i=0; i<mbs; i++) { 2328 ncols = bi[1] - bi[0]; bi++; 2329 brow = bs*i; 2330 for (j=0; j<ncols; j++){ 2331 bcol = bs*(*bj); 2332 for (kcol=0; kcol<bs; kcol++){ 2333 col = bcol + kcol; /* local col index */ 2334 col += rowners_bs[rank+1]; /* global col index */ 2335 for (krow=0; krow<bs; krow++){ 2336 atmp = PetscAbsScalar(*ba); ba++; 2337 row = brow + krow; /* local row index */ 2338 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2339 if (work[col] < atmp) work[col] = atmp; 2340 } 2341 } 2342 bj++; 2343 } 2344 } 2345 2346 /* send values to its owners */ 2347 for (dest=rank+1; dest<size; dest++){ 2348 svalues = work + rowners_bs[dest]; 2349 count = rowners_bs[dest+1]-rowners_bs[dest]; 2350 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2351 } 2352 } 2353 2354 /* receive values */ 2355 if (rank){ 2356 rvalues = work; 2357 count = rowners_bs[rank+1]-rowners_bs[rank]; 2358 for (source=0; source<rank; source++){ 2359 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2360 /* process values */ 2361 for (i=0; i<count; i++){ 2362 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2363 } 2364 } 2365 } 2366 2367 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2368 ierr = PetscFree(work);CHKERRQ(ierr); 2369 PetscFunctionReturn(0); 2370 } 2371 2372 #undef __FUNCT__ 2373 #define __FUNCT__ "MatRelax_MPISBAIJ" 2374 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2375 { 2376 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2377 PetscErrorCode ierr; 2378 PetscInt mbs=mat->mbs,bs=matin->bs; 2379 PetscScalar *x,*b,*ptr,zero=0.0; 2380 Vec bb1; 2381 2382 PetscFunctionBegin; 2383 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2384 if (bs > 1) 2385 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2386 2387 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2388 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2389 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2390 its--; 2391 } 2392 2393 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2394 while (its--){ 2395 2396 /* lower triangular part: slvec0b = - B^T*xx */ 2397 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2398 2399 /* copy xx into slvec0a */ 2400 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2401 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2402 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2403 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2404 2405 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2406 2407 /* copy bb into slvec1a */ 2408 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2409 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2410 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2411 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2412 2413 /* set slvec1b = 0 */ 2414 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2415 2416 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2417 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2418 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2419 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2420 2421 /* upper triangular part: bb1 = bb1 - B*x */ 2422 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2423 2424 /* local diagonal sweep */ 2425 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2426 } 2427 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2428 } else { 2429 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2430 } 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #undef __FUNCT__ 2435 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2436 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2437 { 2438 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2439 PetscErrorCode ierr; 2440 Vec lvec1,bb1; 2441 2442 PetscFunctionBegin; 2443 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2444 if (matin->bs > 1) 2445 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2446 2447 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2448 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2449 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2450 its--; 2451 } 2452 2453 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2454 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2455 while (its--){ 2456 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2457 2458 /* lower diagonal part: bb1 = bb - B^T*xx */ 2459 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2460 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2461 2462 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2463 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2464 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2465 2466 /* upper diagonal part: bb1 = bb1 - B*x */ 2467 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2468 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2469 2470 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2471 2472 /* diagonal sweep */ 2473 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2474 } 2475 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2476 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2477 } else { 2478 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2479 } 2480 PetscFunctionReturn(0); 2481 } 2482 2483