1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 104 N = nrow++ - 1; \ 105 /* shift up all the later entries in this row */ \ 106 for (ii=N; ii>=_i; ii--) { \ 107 rp[ii+1] = rp[ii]; \ 108 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 109 } \ 110 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 111 rp[_i] = bcol; \ 112 ap[bs2*_i + bs*cidx + ridx] = value; \ 113 a_noinsert:; \ 114 ailen[brow] = nrow; \ 115 } 116 #ifndef MatSetValues_SeqBAIJ_B_Private 117 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 118 { \ 119 brow = row/bs; \ 120 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 121 rmax = bimax[brow]; nrow = bilen[brow]; \ 122 bcol = col/bs; \ 123 ridx = row % bs; cidx = col % bs; \ 124 low = 0; high = nrow; \ 125 while (high-low > 3) { \ 126 t = (low+high)/2; \ 127 if (rp[t] > bcol) high = t; \ 128 else low = t; \ 129 } \ 130 for (_i=low; _i<high; _i++) { \ 131 if (rp[_i] > bcol) break; \ 132 if (rp[_i] == bcol) { \ 133 bap = ap + bs2*_i + bs*cidx + ridx; \ 134 if (addv == ADD_VALUES) *bap += value; \ 135 else *bap = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (b->nonew == 1) goto b_noinsert; \ 140 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 141 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 b_noinsert:; \ 152 bilen[brow] = nrow; \ 153 } 154 #endif 155 156 #if defined(PETSC_USE_MAT_SINGLE) 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetValues_MPISBAIJ" 159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 160 { 161 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,N = m*n; 164 MatScalar *vsingle; 165 166 PetscFunctionBegin; 167 if (N > b->setvalueslen) { 168 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 169 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 170 b->setvalueslen = N; 171 } 172 vsingle = b->setvaluescopy; 173 174 for (i=0; i<N; i++) { 175 vsingle[i] = v[i]; 176 } 177 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 184 { 185 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 186 PetscErrorCode ierr; 187 PetscInt i,N = m*n*b->bs2; 188 MatScalar *vsingle; 189 190 PetscFunctionBegin; 191 if (N > b->setvalueslen) { 192 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 193 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 194 b->setvalueslen = N; 195 } 196 vsingle = b->setvaluescopy; 197 for (i=0; i<N; i++) { 198 vsingle[i] = v[i]; 199 } 200 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 #endif 204 205 /* Only add/insert a(i,j) with i<=j (blocks). 206 Any a(i,j) with i>j input by user is ingored. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 211 { 212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 213 MatScalar value; 214 PetscTruth roworiented = baij->roworiented; 215 PetscErrorCode ierr; 216 PetscInt i,j,row,col; 217 PetscInt rstart_orig=baij->rstart_bs; 218 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 219 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 220 221 /* Some Variables required in the macro */ 222 Mat A = baij->A; 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 224 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 225 MatScalar *aa=a->a; 226 227 Mat B = baij->B; 228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 229 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 230 MatScalar *ba=b->a; 231 232 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 233 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 234 MatScalar *ap,*bap; 235 236 /* for stash */ 237 PetscInt n_loc, *in_loc=0; 238 MatScalar *v_loc=0; 239 240 PetscFunctionBegin; 241 242 if(!baij->donotstash){ 243 ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr); 244 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 245 } 246 247 for (i=0; i<m; i++) { 248 if (im[i] < 0) continue; 249 #if defined(PETSC_USE_DEBUG) 250 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 251 #endif 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 253 row = im[i] - rstart_orig; /* local row index */ 254 for (j=0; j<n; j++) { 255 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 256 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 257 col = in[j] - cstart_orig; /* local col index */ 258 brow = row/bs; bcol = col/bs; 259 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 260 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 261 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } else if (in[j] < 0) continue; 264 #if defined(PETSC_USE_DEBUG) 265 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 266 #endif 267 else { /* off-diag entry (B) */ 268 if (mat->was_assembled) { 269 if (!baij->colmap) { 270 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 271 } 272 #if defined (PETSC_USE_CTABLE) 273 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 274 col = col - 1; 275 #else 276 col = baij->colmap[in[j]/bs] - 1; 277 #endif 278 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 279 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 280 col = in[j]; 281 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 282 B = baij->B; 283 b = (Mat_SeqBAIJ*)(B)->data; 284 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 285 ba=b->a; 286 } else col += in[j]%bs; 287 } else col = in[j]; 288 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 289 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 290 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 291 } 292 } 293 } else { /* off processor entry */ 294 if (!baij->donotstash) { 295 n_loc = 0; 296 for (j=0; j<n; j++){ 297 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 298 in_loc[n_loc] = in[j]; 299 if (roworiented) { 300 v_loc[n_loc] = v[i*n+j]; 301 } else { 302 v_loc[n_loc] = v[j*m+i]; 303 } 304 n_loc++; 305 } 306 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 307 } 308 } 309 } 310 311 if(!baij->donotstash){ 312 ierr = PetscFree(in_loc);CHKERRQ(ierr); 313 ierr = PetscFree(v_loc);CHKERRQ(ierr); 314 } 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 321 { 322 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 323 const MatScalar *value; 324 MatScalar *barray=baij->barray; 325 PetscTruth roworiented = baij->roworiented; 326 PetscErrorCode ierr; 327 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 328 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 329 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 330 331 PetscFunctionBegin; 332 if(!barray) { 333 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 334 baij->barray = barray; 335 } 336 337 if (roworiented) { 338 stepval = (n-1)*bs; 339 } else { 340 stepval = (m-1)*bs; 341 } 342 for (i=0; i<m; i++) { 343 if (im[i] < 0) continue; 344 #if defined(PETSC_USE_DEBUG) 345 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 346 #endif 347 if (im[i] >= rstart && im[i] < rend) { 348 row = im[i] - rstart; 349 for (j=0; j<n; j++) { 350 /* If NumCol = 1 then a copy is not required */ 351 if ((roworiented) && (n == 1)) { 352 barray = (MatScalar*) v + i*bs2; 353 } else if((!roworiented) && (m == 1)) { 354 barray = (MatScalar*) v + j*bs2; 355 } else { /* Here a copy is required */ 356 if (roworiented) { 357 value = v + i*(stepval+bs)*bs + j*bs; 358 } else { 359 value = v + j*(stepval+bs)*bs + i*bs; 360 } 361 for (ii=0; ii<bs; ii++,value+=stepval) { 362 for (jj=0; jj<bs; jj++) { 363 *barray++ = *value++; 364 } 365 } 366 barray -=bs2; 367 } 368 369 if (in[j] >= cstart && in[j] < cend){ 370 col = in[j] - cstart; 371 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 372 } 373 else if (in[j] < 0) continue; 374 #if defined(PETSC_USE_DEBUG) 375 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 376 #endif 377 else { 378 if (mat->was_assembled) { 379 if (!baij->colmap) { 380 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 381 } 382 383 #if defined(PETSC_USE_DEBUG) 384 #if defined (PETSC_USE_CTABLE) 385 { PetscInt data; 386 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 387 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 388 } 389 #else 390 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 391 #endif 392 #endif 393 #if defined (PETSC_USE_CTABLE) 394 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 395 col = (col - 1)/bs; 396 #else 397 col = (baij->colmap[in[j]] - 1)/bs; 398 #endif 399 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 400 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 401 col = in[j]; 402 } 403 } 404 else col = in[j]; 405 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 406 } 407 } 408 } else { 409 if (!baij->donotstash) { 410 if (roworiented) { 411 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 412 } else { 413 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 414 } 415 } 416 } 417 } 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatGetValues_MPISBAIJ" 423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 424 { 425 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 426 PetscErrorCode ierr; 427 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 428 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 429 430 PetscFunctionBegin; 431 for (i=0; i<m; i++) { 432 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 433 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 434 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 435 row = idxm[i] - bsrstart; 436 for (j=0; j<n; j++) { 437 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 438 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 439 if (idxn[j] >= bscstart && idxn[j] < bscend){ 440 col = idxn[j] - bscstart; 441 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } else { 443 if (!baij->colmap) { 444 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 445 } 446 #if defined (PETSC_USE_CTABLE) 447 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 448 data --; 449 #else 450 data = baij->colmap[idxn[j]/bs]-1; 451 #endif 452 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 453 else { 454 col = data + idxn[j]%bs; 455 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 456 } 457 } 458 } 459 } else { 460 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 461 } 462 } 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatNorm_MPISBAIJ" 468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 469 { 470 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 471 PetscErrorCode ierr; 472 PetscReal sum[2],*lnorm2; 473 474 PetscFunctionBegin; 475 if (baij->size == 1) { 476 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 477 } else { 478 if (type == NORM_FROBENIUS) { 479 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 480 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 481 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 482 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 483 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 484 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 485 *norm = sqrt(sum[0] + 2*sum[1]); 486 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 487 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 488 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 489 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 490 PetscReal *rsum,*rsum2,vabs; 491 PetscInt *jj,*garray=baij->garray,rstart=baij->rstart,nz; 492 PetscInt brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs; 493 MatScalar *v; 494 495 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 496 rsum2 = rsum + mat->N; 497 ierr = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 498 /* Amat */ 499 v = amat->a; jj = amat->j; 500 for (brow=0; brow<mbs; brow++) { 501 grow = bs*(rstart + brow); 502 nz = amat->i[brow+1] - amat->i[brow]; 503 for (bcol=0; bcol<nz; bcol++){ 504 gcol = bs*(rstart + *jj); jj++; 505 for (col=0; col<bs; col++){ 506 for (row=0; row<bs; row++){ 507 vabs = PetscAbsScalar(*v); v++; 508 rsum[gcol+col] += vabs; 509 /* non-diagonal block */ 510 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 511 } 512 } 513 } 514 } 515 /* Bmat */ 516 v = bmat->a; jj = bmat->j; 517 for (brow=0; brow<mbs; brow++) { 518 grow = bs*(rstart + brow); 519 nz = bmat->i[brow+1] - bmat->i[brow]; 520 for (bcol=0; bcol<nz; bcol++){ 521 gcol = bs*garray[*jj]; jj++; 522 for (col=0; col<bs; col++){ 523 for (row=0; row<bs; row++){ 524 vabs = PetscAbsScalar(*v); v++; 525 rsum[gcol+col] += vabs; 526 rsum[grow+row] += vabs; 527 } 528 } 529 } 530 } 531 ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 532 *norm = 0.0; 533 for (col=0; col<mat->N; col++) { 534 if (rsum2[col] > *norm) *norm = rsum2[col]; 535 } 536 ierr = PetscFree(rsum);CHKERRQ(ierr); 537 } else { 538 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 539 } 540 } 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 547 { 548 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 549 PetscErrorCode ierr; 550 PetscInt nstash,reallocs; 551 InsertMode addv; 552 553 PetscFunctionBegin; 554 if (baij->donotstash) { 555 PetscFunctionReturn(0); 556 } 557 558 /* make sure all processors are either in INSERTMODE or ADDMODE */ 559 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 560 if (addv == (ADD_VALUES|INSERT_VALUES)) { 561 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 562 } 563 mat->insertmode = addv; /* in case this processor had no cache */ 564 565 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 566 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 567 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 568 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 569 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 570 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 577 { 578 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 579 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 580 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 581 PetscErrorCode ierr; 582 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 583 PetscInt *row,*col,other_disassembled; 584 PetscMPIInt n; 585 PetscTruth r1,r2,r3; 586 MatScalar *val; 587 InsertMode addv = mat->insertmode; 588 589 PetscFunctionBegin; 590 591 if (!baij->donotstash) { 592 while (1) { 593 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 594 if (!flg) break; 595 596 for (i=0; i<n;) { 597 /* Now identify the consecutive vals belonging to the same row */ 598 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 599 if (j < n) ncols = j-i; 600 else ncols = n-i; 601 /* Now assemble all these values with a single function call */ 602 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 603 i = j; 604 } 605 } 606 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 607 /* Now process the block-stash. Since the values are stashed column-oriented, 608 set the roworiented flag to column oriented, and after MatSetValues() 609 restore the original flags */ 610 r1 = baij->roworiented; 611 r2 = a->roworiented; 612 r3 = b->roworiented; 613 baij->roworiented = PETSC_FALSE; 614 a->roworiented = PETSC_FALSE; 615 b->roworiented = PETSC_FALSE; 616 while (1) { 617 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 618 if (!flg) break; 619 620 for (i=0; i<n;) { 621 /* Now identify the consecutive vals belonging to the same row */ 622 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 623 if (j < n) ncols = j-i; 624 else ncols = n-i; 625 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 626 i = j; 627 } 628 } 629 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 630 baij->roworiented = r1; 631 a->roworiented = r2; 632 b->roworiented = r3; 633 } 634 635 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 636 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 637 638 /* determine if any processor has disassembled, if so we must 639 also disassemble ourselfs, in order that we may reassemble. */ 640 /* 641 if nonzero structure of submatrix B cannot change then we know that 642 no processor disassembled thus we can skip this stuff 643 */ 644 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 645 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 646 if (mat->was_assembled && !other_disassembled) { 647 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 648 } 649 } 650 651 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 652 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 653 } 654 b->compressedrow.use = PETSC_TRUE; 655 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 656 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 657 658 if (baij->rowvalues) { 659 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 660 baij->rowvalues = 0; 661 } 662 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt bs = mat->bs; 673 PetscMPIInt size = baij->size,rank = baij->rank; 674 PetscTruth iascii,isdraw; 675 PetscViewer sviewer; 676 PetscViewerFormat format; 677 678 PetscFunctionBegin; 679 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 680 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 681 if (iascii) { 682 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 683 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 684 MatInfo info; 685 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 686 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 687 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 688 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 689 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 690 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 692 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 694 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 695 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } else if (format == PETSC_VIEWER_ASCII_INFO) { 698 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 701 PetscFunctionReturn(0); 702 } 703 } 704 705 if (isdraw) { 706 PetscDraw draw; 707 PetscTruth isnull; 708 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 709 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 710 } 711 712 if (size == 1) { 713 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 714 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 715 } else { 716 /* assemble the entire matrix onto first processor. */ 717 Mat A; 718 Mat_SeqSBAIJ *Aloc; 719 Mat_SeqBAIJ *Bloc; 720 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 721 MatScalar *a; 722 723 /* Should this be the same type as mat? */ 724 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 725 if (!rank) { 726 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 727 } else { 728 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 729 } 730 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 731 ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 733 734 /* copy over the A part */ 735 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 736 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 737 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 738 739 for (i=0; i<mbs; i++) { 740 rvals[0] = bs*(baij->rstart + i); 741 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 742 for (j=ai[i]; j<ai[i+1]; j++) { 743 col = (baij->cstart+aj[j])*bs; 744 for (k=0; k<bs; k++) { 745 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 746 col++; a += bs; 747 } 748 } 749 } 750 /* copy over the B part */ 751 Bloc = (Mat_SeqBAIJ*)baij->B->data; 752 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 753 for (i=0; i<mbs; i++) { 754 rvals[0] = bs*(baij->rstart + i); 755 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 756 for (j=ai[i]; j<ai[i+1]; j++) { 757 col = baij->garray[aj[j]]*bs; 758 for (k=0; k<bs; k++) { 759 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 760 col++; a += bs; 761 } 762 } 763 } 764 ierr = PetscFree(rvals);CHKERRQ(ierr); 765 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 /* 768 Everyone has to call to draw the matrix since the graphics waits are 769 synchronized across all processors that share the PetscDraw object 770 */ 771 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 772 if (!rank) { 773 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 774 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 775 } 776 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 777 ierr = MatDestroy(A);CHKERRQ(ierr); 778 } 779 PetscFunctionReturn(0); 780 } 781 782 #undef __FUNCT__ 783 #define __FUNCT__ "MatView_MPISBAIJ" 784 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 785 { 786 PetscErrorCode ierr; 787 PetscTruth iascii,isdraw,issocket,isbinary; 788 789 PetscFunctionBegin; 790 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 791 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 792 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 793 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 794 if (iascii || isdraw || issocket || isbinary) { 795 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 796 } else { 797 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 798 } 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatDestroy_MPISBAIJ" 804 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 805 { 806 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 807 PetscErrorCode ierr; 808 809 PetscFunctionBegin; 810 #if defined(PETSC_USE_LOG) 811 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 812 #endif 813 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 814 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 815 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 816 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 817 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 818 #if defined (PETSC_USE_CTABLE) 819 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 820 #else 821 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 822 #endif 823 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 824 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 825 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 826 if (baij->slvec0) { 827 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 828 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 829 } 830 if (baij->slvec1) { 831 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 832 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 833 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 834 } 835 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 836 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 837 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 838 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 839 #if defined(PETSC_USE_MAT_SINGLE) 840 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 841 #endif 842 ierr = PetscFree(baij);CHKERRQ(ierr); 843 844 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 845 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 846 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 847 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatMult_MPISBAIJ" 853 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 854 { 855 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 856 PetscErrorCode ierr; 857 PetscInt nt,mbs=a->mbs,bs=A->bs; 858 PetscScalar *x,*from,zero=0.0; 859 860 PetscFunctionBegin; 861 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 862 if (nt != A->n) { 863 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 864 } 865 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 866 if (nt != A->m) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 868 } 869 870 /* diagonal part */ 871 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 872 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 873 874 /* subdiagonal part */ 875 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 876 877 /* copy x into the vec slvec0 */ 878 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 879 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 880 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 881 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 882 883 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 884 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 885 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 886 887 /* supperdiagonal part */ 888 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 889 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 895 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 896 { 897 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 898 PetscErrorCode ierr; 899 PetscInt nt; 900 901 PetscFunctionBegin; 902 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 903 if (nt != A->n) { 904 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 905 } 906 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 907 if (nt != A->m) { 908 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 909 } 910 911 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 912 /* do diagonal part */ 913 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 914 /* do supperdiagonal part */ 915 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 916 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 917 /* do subdiagonal part */ 918 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 919 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 920 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 921 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 927 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 928 { 929 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 930 PetscErrorCode ierr; 931 PetscInt mbs=a->mbs,bs=A->bs; 932 PetscScalar *x,*from,zero=0.0; 933 934 PetscFunctionBegin; 935 /* 936 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 937 PetscSynchronizedFlush(A->comm); 938 */ 939 /* diagonal part */ 940 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 941 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 942 943 /* subdiagonal part */ 944 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 945 946 /* copy x into the vec slvec0 */ 947 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 948 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 949 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 950 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 951 952 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 953 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 954 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 955 956 /* supperdiagonal part */ 957 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 958 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 964 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 965 { 966 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 967 PetscErrorCode ierr; 968 969 PetscFunctionBegin; 970 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 971 /* do diagonal part */ 972 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 973 /* do supperdiagonal part */ 974 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 975 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 976 977 /* do subdiagonal part */ 978 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 979 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 980 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 981 982 PetscFunctionReturn(0); 983 } 984 985 /* 986 This only works correctly for square matrices where the subblock A->A is the 987 diagonal block 988 */ 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 991 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 992 { 993 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 994 PetscErrorCode ierr; 995 996 PetscFunctionBegin; 997 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 998 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatScale_MPISBAIJ" 1004 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1005 { 1006 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1007 PetscErrorCode ierr; 1008 1009 PetscFunctionBegin; 1010 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1011 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1017 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1018 { 1019 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1020 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1021 PetscErrorCode ierr; 1022 PetscInt bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1023 PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1024 PetscInt *cmap,*idx_p,cstart = mat->cstart; 1025 1026 PetscFunctionBegin; 1027 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1028 mat->getrowactive = PETSC_TRUE; 1029 1030 if (!mat->rowvalues && (idx || v)) { 1031 /* 1032 allocate enough space to hold information from the longest row. 1033 */ 1034 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1035 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1036 PetscInt max = 1,mbs = mat->mbs,tmp; 1037 for (i=0; i<mbs; i++) { 1038 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1039 if (max < tmp) { max = tmp; } 1040 } 1041 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1042 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1043 } 1044 1045 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1046 lrow = row - brstart; /* local row index */ 1047 1048 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1049 if (!v) {pvA = 0; pvB = 0;} 1050 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1051 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1052 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1053 nztot = nzA + nzB; 1054 1055 cmap = mat->garray; 1056 if (v || idx) { 1057 if (nztot) { 1058 /* Sort by increasing column numbers, assuming A and B already sorted */ 1059 PetscInt imark = -1; 1060 if (v) { 1061 *v = v_p = mat->rowvalues; 1062 for (i=0; i<nzB; i++) { 1063 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1064 else break; 1065 } 1066 imark = i; 1067 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1068 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1069 } 1070 if (idx) { 1071 *idx = idx_p = mat->rowindices; 1072 if (imark > -1) { 1073 for (i=0; i<imark; i++) { 1074 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1075 } 1076 } else { 1077 for (i=0; i<nzB; i++) { 1078 if (cmap[cworkB[i]/bs] < cstart) 1079 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1080 else break; 1081 } 1082 imark = i; 1083 } 1084 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1085 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1086 } 1087 } else { 1088 if (idx) *idx = 0; 1089 if (v) *v = 0; 1090 } 1091 } 1092 *nz = nztot; 1093 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1094 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 #undef __FUNCT__ 1099 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1100 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1101 { 1102 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1103 1104 PetscFunctionBegin; 1105 if (!baij->getrowactive) { 1106 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1107 } 1108 baij->getrowactive = PETSC_FALSE; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1114 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1115 { 1116 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1117 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1118 1119 PetscFunctionBegin; 1120 aA->getrow_utriangular = PETSC_TRUE; 1121 PetscFunctionReturn(0); 1122 } 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1125 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1126 { 1127 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1128 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1129 1130 PetscFunctionBegin; 1131 aA->getrow_utriangular = PETSC_FALSE; 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1137 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1138 { 1139 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1144 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1150 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1151 { 1152 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1153 PetscErrorCode ierr; 1154 1155 PetscFunctionBegin; 1156 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1157 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1163 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1164 { 1165 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1166 PetscErrorCode ierr; 1167 1168 PetscFunctionBegin; 1169 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1170 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1176 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1177 { 1178 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1179 Mat A = a->A,B = a->B; 1180 PetscErrorCode ierr; 1181 PetscReal isend[5],irecv[5]; 1182 1183 PetscFunctionBegin; 1184 info->block_size = (PetscReal)matin->bs; 1185 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1186 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1187 isend[3] = info->memory; isend[4] = info->mallocs; 1188 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1189 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1190 isend[3] += info->memory; isend[4] += info->mallocs; 1191 if (flag == MAT_LOCAL) { 1192 info->nz_used = isend[0]; 1193 info->nz_allocated = isend[1]; 1194 info->nz_unneeded = isend[2]; 1195 info->memory = isend[3]; 1196 info->mallocs = isend[4]; 1197 } else if (flag == MAT_GLOBAL_MAX) { 1198 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1199 info->nz_used = irecv[0]; 1200 info->nz_allocated = irecv[1]; 1201 info->nz_unneeded = irecv[2]; 1202 info->memory = irecv[3]; 1203 info->mallocs = irecv[4]; 1204 } else if (flag == MAT_GLOBAL_SUM) { 1205 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1206 info->nz_used = irecv[0]; 1207 info->nz_allocated = irecv[1]; 1208 info->nz_unneeded = irecv[2]; 1209 info->memory = irecv[3]; 1210 info->mallocs = irecv[4]; 1211 } else { 1212 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1213 } 1214 info->rows_global = (PetscReal)A->M; 1215 info->columns_global = (PetscReal)A->N; 1216 info->rows_local = (PetscReal)A->m; 1217 info->columns_local = (PetscReal)A->N; 1218 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1219 info->fill_ratio_needed = 0; 1220 info->factor_mallocs = 0; 1221 PetscFunctionReturn(0); 1222 } 1223 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1226 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1227 { 1228 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1229 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1230 PetscErrorCode ierr; 1231 1232 PetscFunctionBegin; 1233 switch (op) { 1234 case MAT_NO_NEW_NONZERO_LOCATIONS: 1235 case MAT_YES_NEW_NONZERO_LOCATIONS: 1236 case MAT_COLUMNS_UNSORTED: 1237 case MAT_COLUMNS_SORTED: 1238 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1239 case MAT_KEEP_ZEROED_ROWS: 1240 case MAT_NEW_NONZERO_LOCATION_ERR: 1241 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1242 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1243 break; 1244 case MAT_ROW_ORIENTED: 1245 a->roworiented = PETSC_TRUE; 1246 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1247 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1248 break; 1249 case MAT_ROWS_SORTED: 1250 case MAT_ROWS_UNSORTED: 1251 case MAT_YES_NEW_DIAGONALS: 1252 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1253 break; 1254 case MAT_COLUMN_ORIENTED: 1255 a->roworiented = PETSC_FALSE; 1256 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1257 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1258 break; 1259 case MAT_IGNORE_OFF_PROC_ENTRIES: 1260 a->donotstash = PETSC_TRUE; 1261 break; 1262 case MAT_NO_NEW_DIAGONALS: 1263 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1264 case MAT_USE_HASH_TABLE: 1265 a->ht_flag = PETSC_TRUE; 1266 break; 1267 case MAT_NOT_SYMMETRIC: 1268 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1269 case MAT_HERMITIAN: 1270 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1271 case MAT_SYMMETRIC: 1272 case MAT_STRUCTURALLY_SYMMETRIC: 1273 case MAT_NOT_HERMITIAN: 1274 case MAT_SYMMETRY_ETERNAL: 1275 case MAT_NOT_SYMMETRY_ETERNAL: 1276 break; 1277 case MAT_IGNORE_LOWER_TRIANGULAR: 1278 aA->ignore_ltriangular = PETSC_TRUE; 1279 break; 1280 case MAT_ERROR_LOWER_TRIANGULAR: 1281 aA->ignore_ltriangular = PETSC_FALSE; 1282 break; 1283 case MAT_GETROW_UPPERTRIANGULAR: 1284 aA->getrow_utriangular = PETSC_TRUE; 1285 break; 1286 default: 1287 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1288 } 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1294 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1295 { 1296 PetscErrorCode ierr; 1297 PetscFunctionBegin; 1298 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1304 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1305 { 1306 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1307 Mat a=baij->A, b=baij->B; 1308 PetscErrorCode ierr; 1309 PetscInt nv,m,n; 1310 PetscTruth flg; 1311 1312 PetscFunctionBegin; 1313 if (ll != rr){ 1314 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1315 if (!flg) 1316 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1317 } 1318 if (!ll) PetscFunctionReturn(0); 1319 1320 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1321 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1322 1323 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1324 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1325 1326 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1327 1328 /* left diagonalscale the off-diagonal part */ 1329 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1330 1331 /* scale the diagonal part */ 1332 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1333 1334 /* right diagonalscale the off-diagonal part */ 1335 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1336 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1342 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1343 { 1344 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1345 MPI_Comm comm = A->comm; 1346 static PetscTruth called = PETSC_FALSE; 1347 PetscErrorCode ierr; 1348 1349 PetscFunctionBegin; 1350 if (!a->rank) { 1351 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1352 } 1353 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1354 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1355 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1361 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1362 { 1363 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 1371 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatEqual_MPISBAIJ" 1375 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1376 { 1377 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1378 Mat a,b,c,d; 1379 PetscTruth flg; 1380 PetscErrorCode ierr; 1381 1382 PetscFunctionBegin; 1383 a = matA->A; b = matA->B; 1384 c = matB->A; d = matB->B; 1385 1386 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1387 if (flg) { 1388 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1389 } 1390 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1391 PetscFunctionReturn(0); 1392 } 1393 1394 #undef __FUNCT__ 1395 #define __FUNCT__ "MatCopy_MPISBAIJ" 1396 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1397 { 1398 PetscErrorCode ierr; 1399 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1400 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1401 1402 PetscFunctionBegin; 1403 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1404 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1405 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1406 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1407 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1408 } else { 1409 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1410 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1411 } 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1417 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1418 { 1419 PetscErrorCode ierr; 1420 1421 PetscFunctionBegin; 1422 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #include "petscblaslapack.h" 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1429 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1430 { 1431 PetscErrorCode ierr; 1432 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1433 PetscBLASInt bnz,one=1; 1434 Mat_SeqSBAIJ *xa,*ya; 1435 Mat_SeqBAIJ *xb,*yb; 1436 1437 PetscFunctionBegin; 1438 if (str == SAME_NONZERO_PATTERN) { 1439 PetscScalar alpha = a; 1440 xa = (Mat_SeqSBAIJ *)xx->A->data; 1441 ya = (Mat_SeqSBAIJ *)yy->A->data; 1442 bnz = (PetscBLASInt)xa->nz; 1443 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1444 xb = (Mat_SeqBAIJ *)xx->B->data; 1445 yb = (Mat_SeqBAIJ *)yy->B->data; 1446 bnz = (PetscBLASInt)xb->nz; 1447 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1448 } else { 1449 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1450 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1451 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1452 } 1453 PetscFunctionReturn(0); 1454 } 1455 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1458 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1459 { 1460 PetscErrorCode ierr; 1461 PetscInt i; 1462 PetscTruth flg; 1463 1464 PetscFunctionBegin; 1465 for (i=0; i<n; i++) { 1466 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1467 if (!flg) { 1468 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1469 } 1470 } 1471 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 1476 /* -------------------------------------------------------------------*/ 1477 static struct _MatOps MatOps_Values = { 1478 MatSetValues_MPISBAIJ, 1479 MatGetRow_MPISBAIJ, 1480 MatRestoreRow_MPISBAIJ, 1481 MatMult_MPISBAIJ, 1482 /* 4*/ MatMultAdd_MPISBAIJ, 1483 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1484 MatMultAdd_MPISBAIJ, 1485 0, 1486 0, 1487 0, 1488 /*10*/ 0, 1489 0, 1490 0, 1491 MatRelax_MPISBAIJ, 1492 MatTranspose_MPISBAIJ, 1493 /*15*/ MatGetInfo_MPISBAIJ, 1494 MatEqual_MPISBAIJ, 1495 MatGetDiagonal_MPISBAIJ, 1496 MatDiagonalScale_MPISBAIJ, 1497 MatNorm_MPISBAIJ, 1498 /*20*/ MatAssemblyBegin_MPISBAIJ, 1499 MatAssemblyEnd_MPISBAIJ, 1500 0, 1501 MatSetOption_MPISBAIJ, 1502 MatZeroEntries_MPISBAIJ, 1503 /*25*/ 0, 1504 0, 1505 0, 1506 0, 1507 0, 1508 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1509 0, 1510 0, 1511 0, 1512 0, 1513 /*35*/ MatDuplicate_MPISBAIJ, 1514 0, 1515 0, 1516 0, 1517 0, 1518 /*40*/ MatAXPY_MPISBAIJ, 1519 MatGetSubMatrices_MPISBAIJ, 1520 MatIncreaseOverlap_MPISBAIJ, 1521 MatGetValues_MPISBAIJ, 1522 MatCopy_MPISBAIJ, 1523 /*45*/ MatPrintHelp_MPISBAIJ, 1524 MatScale_MPISBAIJ, 1525 0, 1526 0, 1527 0, 1528 /*50*/ 0, 1529 0, 1530 0, 1531 0, 1532 0, 1533 /*55*/ 0, 1534 0, 1535 MatSetUnfactored_MPISBAIJ, 1536 0, 1537 MatSetValuesBlocked_MPISBAIJ, 1538 /*60*/ 0, 1539 0, 1540 0, 1541 MatGetPetscMaps_Petsc, 1542 0, 1543 /*65*/ 0, 1544 0, 1545 0, 1546 0, 1547 0, 1548 /*70*/ MatGetRowMax_MPISBAIJ, 1549 0, 1550 0, 1551 0, 1552 0, 1553 /*75*/ 0, 1554 0, 1555 0, 1556 0, 1557 0, 1558 /*80*/ 0, 1559 0, 1560 0, 1561 0, 1562 MatLoad_MPISBAIJ, 1563 /*85*/ 0, 1564 0, 1565 0, 1566 0, 1567 0, 1568 /*90*/ 0, 1569 0, 1570 0, 1571 0, 1572 0, 1573 /*95*/ 0, 1574 0, 1575 0, 1576 0, 1577 0, 1578 /*100*/0, 1579 0, 1580 0, 1581 0, 1582 0, 1583 /*105*/0, 1584 MatRealPart_MPISBAIJ, 1585 MatImaginaryPart_MPISBAIJ, 1586 MatGetRowUpperTriangular_MPISBAIJ, 1587 MatRestoreRowUpperTriangular_MPISBAIJ 1588 }; 1589 1590 1591 EXTERN_C_BEGIN 1592 #undef __FUNCT__ 1593 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1594 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1595 { 1596 PetscFunctionBegin; 1597 *a = ((Mat_MPISBAIJ *)A->data)->A; 1598 *iscopy = PETSC_FALSE; 1599 PetscFunctionReturn(0); 1600 } 1601 EXTERN_C_END 1602 1603 EXTERN_C_BEGIN 1604 #undef __FUNCT__ 1605 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1606 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1607 { 1608 Mat_MPISBAIJ *b; 1609 PetscErrorCode ierr; 1610 PetscInt i,mbs,Mbs; 1611 1612 PetscFunctionBegin; 1613 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1614 1615 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1616 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1617 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1618 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1619 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1620 if (d_nnz) { 1621 for (i=0; i<B->m/bs; i++) { 1622 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1623 } 1624 } 1625 if (o_nnz) { 1626 for (i=0; i<B->m/bs; i++) { 1627 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1628 } 1629 } 1630 B->preallocated = PETSC_TRUE; 1631 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1632 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1633 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1634 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1635 1636 b = (Mat_MPISBAIJ*)B->data; 1637 mbs = B->m/bs; 1638 Mbs = B->M/bs; 1639 if (mbs*bs != B->m) { 1640 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs); 1641 } 1642 1643 B->bs = bs; 1644 b->bs2 = bs*bs; 1645 b->mbs = mbs; 1646 b->nbs = mbs; 1647 b->Mbs = Mbs; 1648 b->Nbs = Mbs; 1649 1650 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1651 b->rowners[0] = 0; 1652 for (i=2; i<=b->size; i++) { 1653 b->rowners[i] += b->rowners[i-1]; 1654 } 1655 b->rstart = b->rowners[b->rank]; 1656 b->rend = b->rowners[b->rank+1]; 1657 b->cstart = b->rstart; 1658 b->cend = b->rend; 1659 for (i=0; i<=b->size; i++) { 1660 b->rowners_bs[i] = b->rowners[i]*bs; 1661 } 1662 b->rstart_bs = b-> rstart*bs; 1663 b->rend_bs = b->rend*bs; 1664 1665 b->cstart_bs = b->cstart*bs; 1666 b->cend_bs = b->cend*bs; 1667 1668 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1669 ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr); 1670 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1671 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1672 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1673 1674 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1675 ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr); 1676 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1677 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1678 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1679 1680 /* build cache for off array entries formed */ 1681 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1682 1683 PetscFunctionReturn(0); 1684 } 1685 EXTERN_C_END 1686 1687 /*MC 1688 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1689 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1690 1691 Options Database Keys: 1692 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1693 1694 Level: beginner 1695 1696 .seealso: MatCreateMPISBAIJ 1697 M*/ 1698 1699 EXTERN_C_BEGIN 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatCreate_MPISBAIJ" 1702 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1703 { 1704 Mat_MPISBAIJ *b; 1705 PetscErrorCode ierr; 1706 PetscTruth flg; 1707 1708 PetscFunctionBegin; 1709 1710 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1711 B->data = (void*)b; 1712 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1713 1714 B->ops->destroy = MatDestroy_MPISBAIJ; 1715 B->ops->view = MatView_MPISBAIJ; 1716 B->mapping = 0; 1717 B->factor = 0; 1718 B->assembled = PETSC_FALSE; 1719 1720 B->insertmode = NOT_SET_VALUES; 1721 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1722 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1723 1724 /* build local table of row and column ownerships */ 1725 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 1726 b->cowners = b->rowners + b->size + 2; 1727 b->rowners_bs = b->cowners + b->size + 2; 1728 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1729 1730 /* build cache for off array entries formed */ 1731 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1732 b->donotstash = PETSC_FALSE; 1733 b->colmap = PETSC_NULL; 1734 b->garray = PETSC_NULL; 1735 b->roworiented = PETSC_TRUE; 1736 1737 #if defined(PETSC_USE_MAT_SINGLE) 1738 /* stuff for MatSetValues_XXX in single precision */ 1739 b->setvalueslen = 0; 1740 b->setvaluescopy = PETSC_NULL; 1741 #endif 1742 1743 /* stuff used in block assembly */ 1744 b->barray = 0; 1745 1746 /* stuff used for matrix vector multiply */ 1747 b->lvec = 0; 1748 b->Mvctx = 0; 1749 b->slvec0 = 0; 1750 b->slvec0b = 0; 1751 b->slvec1 = 0; 1752 b->slvec1a = 0; 1753 b->slvec1b = 0; 1754 b->sMvctx = 0; 1755 1756 /* stuff for MatGetRow() */ 1757 b->rowindices = 0; 1758 b->rowvalues = 0; 1759 b->getrowactive = PETSC_FALSE; 1760 1761 /* hash table stuff */ 1762 b->ht = 0; 1763 b->hd = 0; 1764 b->ht_size = 0; 1765 b->ht_flag = PETSC_FALSE; 1766 b->ht_fact = 0; 1767 b->ht_total_ct = 0; 1768 b->ht_insert_ct = 0; 1769 1770 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1771 if (flg) { 1772 PetscReal fact = 1.39; 1773 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1774 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1775 if (fact <= 1.0) fact = 1.39; 1776 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1777 ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 1778 } 1779 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1780 "MatStoreValues_MPISBAIJ", 1781 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1782 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1783 "MatRetrieveValues_MPISBAIJ", 1784 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1785 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1786 "MatGetDiagonalBlock_MPISBAIJ", 1787 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1788 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1789 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1790 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1791 B->symmetric = PETSC_TRUE; 1792 B->structurally_symmetric = PETSC_TRUE; 1793 B->symmetric_set = PETSC_TRUE; 1794 B->structurally_symmetric_set = PETSC_TRUE; 1795 PetscFunctionReturn(0); 1796 } 1797 EXTERN_C_END 1798 1799 /*MC 1800 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1801 1802 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1803 and MATMPISBAIJ otherwise. 1804 1805 Options Database Keys: 1806 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1807 1808 Level: beginner 1809 1810 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1811 M*/ 1812 1813 EXTERN_C_BEGIN 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatCreate_SBAIJ" 1816 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1817 { 1818 PetscErrorCode ierr; 1819 PetscMPIInt size; 1820 1821 PetscFunctionBegin; 1822 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1823 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1824 if (size == 1) { 1825 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1826 } else { 1827 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1828 } 1829 PetscFunctionReturn(0); 1830 } 1831 EXTERN_C_END 1832 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1835 /*@C 1836 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1837 the user should preallocate the matrix storage by setting the parameters 1838 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1839 performance can be increased by more than a factor of 50. 1840 1841 Collective on Mat 1842 1843 Input Parameters: 1844 + A - the matrix 1845 . bs - size of blockk 1846 . d_nz - number of block nonzeros per block row in diagonal portion of local 1847 submatrix (same for all local rows) 1848 . d_nnz - array containing the number of block nonzeros in the various block rows 1849 in the upper triangular and diagonal part of the in diagonal portion of the local 1850 (possibly different for each block row) or PETSC_NULL. You must leave room 1851 for the diagonal entry even if it is zero. 1852 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1853 submatrix (same for all local rows). 1854 - o_nnz - array containing the number of nonzeros in the various block rows of the 1855 off-diagonal portion of the local submatrix (possibly different for 1856 each block row) or PETSC_NULL. 1857 1858 1859 Options Database Keys: 1860 . -mat_no_unroll - uses code that does not unroll the loops in the 1861 block calculations (much slower) 1862 . -mat_block_size - size of the blocks to use 1863 1864 Notes: 1865 1866 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1867 than it must be used on all processors that share the object for that argument. 1868 1869 If the *_nnz parameter is given then the *_nz parameter is ignored 1870 1871 Storage Information: 1872 For a square global matrix we define each processor's diagonal portion 1873 to be its local rows and the corresponding columns (a square submatrix); 1874 each processor's off-diagonal portion encompasses the remainder of the 1875 local matrix (a rectangular submatrix). 1876 1877 The user can specify preallocated storage for the diagonal part of 1878 the local submatrix with either d_nz or d_nnz (not both). Set 1879 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1880 memory allocation. Likewise, specify preallocated storage for the 1881 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1882 1883 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1884 the figure below we depict these three local rows and all columns (0-11). 1885 1886 .vb 1887 0 1 2 3 4 5 6 7 8 9 10 11 1888 ------------------- 1889 row 3 | o o o d d d o o o o o o 1890 row 4 | o o o d d d o o o o o o 1891 row 5 | o o o d d d o o o o o o 1892 ------------------- 1893 .ve 1894 1895 Thus, any entries in the d locations are stored in the d (diagonal) 1896 submatrix, and any entries in the o locations are stored in the 1897 o (off-diagonal) submatrix. Note that the d matrix is stored in 1898 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1899 1900 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1901 plus the diagonal part of the d matrix, 1902 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1903 In general, for PDE problems in which most nonzeros are near the diagonal, 1904 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1905 or you will get TERRIBLE performance; see the users' manual chapter on 1906 matrices. 1907 1908 Level: intermediate 1909 1910 .keywords: matrix, block, aij, compressed row, sparse, parallel 1911 1912 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1913 @*/ 1914 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1915 { 1916 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1917 1918 PetscFunctionBegin; 1919 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1920 if (f) { 1921 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1922 } 1923 PetscFunctionReturn(0); 1924 } 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatCreateMPISBAIJ" 1928 /*@C 1929 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1930 (block compressed row). For good matrix assembly performance 1931 the user should preallocate the matrix storage by setting the parameters 1932 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1933 performance can be increased by more than a factor of 50. 1934 1935 Collective on MPI_Comm 1936 1937 Input Parameters: 1938 + comm - MPI communicator 1939 . bs - size of blockk 1940 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1941 This value should be the same as the local size used in creating the 1942 y vector for the matrix-vector product y = Ax. 1943 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1944 This value should be the same as the local size used in creating the 1945 x vector for the matrix-vector product y = Ax. 1946 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1947 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1948 . d_nz - number of block nonzeros per block row in diagonal portion of local 1949 submatrix (same for all local rows) 1950 . d_nnz - array containing the number of block nonzeros in the various block rows 1951 in the upper triangular portion of the in diagonal portion of the local 1952 (possibly different for each block block row) or PETSC_NULL. 1953 You must leave room for the diagonal entry even if it is zero. 1954 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1955 submatrix (same for all local rows). 1956 - o_nnz - array containing the number of nonzeros in the various block rows of the 1957 off-diagonal portion of the local submatrix (possibly different for 1958 each block row) or PETSC_NULL. 1959 1960 Output Parameter: 1961 . A - the matrix 1962 1963 Options Database Keys: 1964 . -mat_no_unroll - uses code that does not unroll the loops in the 1965 block calculations (much slower) 1966 . -mat_block_size - size of the blocks to use 1967 . -mat_mpi - use the parallel matrix data structures even on one processor 1968 (defaults to using SeqBAIJ format on one processor) 1969 1970 Notes: 1971 The number of rows and columns must be divisible by blocksize. 1972 1973 The user MUST specify either the local or global matrix dimensions 1974 (possibly both). 1975 1976 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1977 than it must be used on all processors that share the object for that argument. 1978 1979 If the *_nnz parameter is given then the *_nz parameter is ignored 1980 1981 Storage Information: 1982 For a square global matrix we define each processor's diagonal portion 1983 to be its local rows and the corresponding columns (a square submatrix); 1984 each processor's off-diagonal portion encompasses the remainder of the 1985 local matrix (a rectangular submatrix). 1986 1987 The user can specify preallocated storage for the diagonal part of 1988 the local submatrix with either d_nz or d_nnz (not both). Set 1989 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1990 memory allocation. Likewise, specify preallocated storage for the 1991 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1992 1993 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1994 the figure below we depict these three local rows and all columns (0-11). 1995 1996 .vb 1997 0 1 2 3 4 5 6 7 8 9 10 11 1998 ------------------- 1999 row 3 | o o o d d d o o o o o o 2000 row 4 | o o o d d d o o o o o o 2001 row 5 | o o o d d d o o o o o o 2002 ------------------- 2003 .ve 2004 2005 Thus, any entries in the d locations are stored in the d (diagonal) 2006 submatrix, and any entries in the o locations are stored in the 2007 o (off-diagonal) submatrix. Note that the d matrix is stored in 2008 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2009 2010 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2011 plus the diagonal part of the d matrix, 2012 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2013 In general, for PDE problems in which most nonzeros are near the diagonal, 2014 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2015 or you will get TERRIBLE performance; see the users' manual chapter on 2016 matrices. 2017 2018 Level: intermediate 2019 2020 .keywords: matrix, block, aij, compressed row, sparse, parallel 2021 2022 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2023 @*/ 2024 2025 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2026 { 2027 PetscErrorCode ierr; 2028 PetscMPIInt size; 2029 2030 PetscFunctionBegin; 2031 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2032 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2033 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2034 if (size > 1) { 2035 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2036 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2037 } else { 2038 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2039 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2040 } 2041 PetscFunctionReturn(0); 2042 } 2043 2044 2045 #undef __FUNCT__ 2046 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2047 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2048 { 2049 Mat mat; 2050 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2051 PetscErrorCode ierr; 2052 PetscInt len=0,nt,bs=matin->bs,mbs=oldmat->mbs; 2053 PetscScalar *array; 2054 2055 PetscFunctionBegin; 2056 *newmat = 0; 2057 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2058 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 2059 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2060 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2061 2062 mat->factor = matin->factor; 2063 mat->preallocated = PETSC_TRUE; 2064 mat->assembled = PETSC_TRUE; 2065 mat->insertmode = NOT_SET_VALUES; 2066 2067 a = (Mat_MPISBAIJ*)mat->data; 2068 mat->bs = matin->bs; 2069 a->bs2 = oldmat->bs2; 2070 a->mbs = oldmat->mbs; 2071 a->nbs = oldmat->nbs; 2072 a->Mbs = oldmat->Mbs; 2073 a->Nbs = oldmat->Nbs; 2074 2075 a->rstart = oldmat->rstart; 2076 a->rend = oldmat->rend; 2077 a->cstart = oldmat->cstart; 2078 a->cend = oldmat->cend; 2079 a->size = oldmat->size; 2080 a->rank = oldmat->rank; 2081 a->donotstash = oldmat->donotstash; 2082 a->roworiented = oldmat->roworiented; 2083 a->rowindices = 0; 2084 a->rowvalues = 0; 2085 a->getrowactive = PETSC_FALSE; 2086 a->barray = 0; 2087 a->rstart_bs = oldmat->rstart_bs; 2088 a->rend_bs = oldmat->rend_bs; 2089 a->cstart_bs = oldmat->cstart_bs; 2090 a->cend_bs = oldmat->cend_bs; 2091 2092 /* hash table stuff */ 2093 a->ht = 0; 2094 a->hd = 0; 2095 a->ht_size = 0; 2096 a->ht_flag = oldmat->ht_flag; 2097 a->ht_fact = oldmat->ht_fact; 2098 a->ht_total_ct = 0; 2099 a->ht_insert_ct = 0; 2100 2101 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2102 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2103 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 2104 if (oldmat->colmap) { 2105 #if defined (PETSC_USE_CTABLE) 2106 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2107 #else 2108 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2109 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2110 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2111 #endif 2112 } else a->colmap = 0; 2113 2114 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2115 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2116 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2117 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2118 } else a->garray = 0; 2119 2120 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2121 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2122 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2123 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2124 2125 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2126 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2127 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2128 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2129 2130 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2131 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2132 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2133 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2134 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2135 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2136 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2137 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2138 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2139 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2140 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2141 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2142 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2143 2144 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2145 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2146 a->sMvctx = oldmat->sMvctx; 2147 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2148 2149 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2150 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2151 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2152 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2153 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2154 *newmat = mat; 2155 PetscFunctionReturn(0); 2156 } 2157 2158 #include "petscsys.h" 2159 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatLoad_MPISBAIJ" 2162 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2163 { 2164 Mat A; 2165 PetscErrorCode ierr; 2166 PetscInt i,nz,j,rstart,rend; 2167 PetscScalar *vals,*buf; 2168 MPI_Comm comm = ((PetscObject)viewer)->comm; 2169 MPI_Status status; 2170 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 2171 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2172 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2173 PetscInt bs=1,Mbs,mbs,extra_rows; 2174 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2175 PetscInt dcount,kmax,k,nzcount,tmp; 2176 int fd; 2177 2178 PetscFunctionBegin; 2179 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2180 2181 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2182 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2183 if (!rank) { 2184 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2185 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2186 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2187 if (header[3] < 0) { 2188 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2189 } 2190 } 2191 2192 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2193 M = header[1]; N = header[2]; 2194 2195 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2196 2197 /* 2198 This code adds extra rows to make sure the number of rows is 2199 divisible by the blocksize 2200 */ 2201 Mbs = M/bs; 2202 extra_rows = bs - M + bs*(Mbs); 2203 if (extra_rows == bs) extra_rows = 0; 2204 else Mbs++; 2205 if (extra_rows &&!rank) { 2206 ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2207 } 2208 2209 /* determine ownership of all rows */ 2210 mbs = Mbs/size + ((Mbs % size) > rank); 2211 m = mbs*bs; 2212 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2213 browners = rowners + size + 1; 2214 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2215 rowners[0] = 0; 2216 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2217 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2218 rstart = rowners[rank]; 2219 rend = rowners[rank+1]; 2220 2221 /* distribute row lengths to all processors */ 2222 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2223 if (!rank) { 2224 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2225 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2226 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2227 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2228 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2229 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2230 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2231 } else { 2232 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2233 } 2234 2235 if (!rank) { /* procs[0] */ 2236 /* calculate the number of nonzeros on each processor */ 2237 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2238 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2239 for (i=0; i<size; i++) { 2240 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2241 procsnz[i] += rowlengths[j]; 2242 } 2243 } 2244 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2245 2246 /* determine max buffer needed and allocate it */ 2247 maxnz = 0; 2248 for (i=0; i<size; i++) { 2249 maxnz = PetscMax(maxnz,procsnz[i]); 2250 } 2251 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2252 2253 /* read in my part of the matrix column indices */ 2254 nz = procsnz[0]; 2255 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2256 mycols = ibuf; 2257 if (size == 1) nz -= extra_rows; 2258 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2259 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2260 2261 /* read in every ones (except the last) and ship off */ 2262 for (i=1; i<size-1; i++) { 2263 nz = procsnz[i]; 2264 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2265 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2266 } 2267 /* read in the stuff for the last proc */ 2268 if (size != 1) { 2269 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2270 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2271 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2272 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2273 } 2274 ierr = PetscFree(cols);CHKERRQ(ierr); 2275 } else { /* procs[i], i>0 */ 2276 /* determine buffer space needed for message */ 2277 nz = 0; 2278 for (i=0; i<m; i++) { 2279 nz += locrowlens[i]; 2280 } 2281 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2282 mycols = ibuf; 2283 /* receive message of column indices*/ 2284 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2285 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2286 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2287 } 2288 2289 /* loop over local rows, determining number of off diagonal entries */ 2290 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2291 odlens = dlens + (rend-rstart); 2292 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2293 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2294 masked1 = mask + Mbs; 2295 masked2 = masked1 + Mbs; 2296 rowcount = 0; nzcount = 0; 2297 for (i=0; i<mbs; i++) { 2298 dcount = 0; 2299 odcount = 0; 2300 for (j=0; j<bs; j++) { 2301 kmax = locrowlens[rowcount]; 2302 for (k=0; k<kmax; k++) { 2303 tmp = mycols[nzcount++]/bs; /* block col. index */ 2304 if (!mask[tmp]) { 2305 mask[tmp] = 1; 2306 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2307 else masked1[dcount++] = tmp; /* entry in diag portion */ 2308 } 2309 } 2310 rowcount++; 2311 } 2312 2313 dlens[i] = dcount; /* d_nzz[i] */ 2314 odlens[i] = odcount; /* o_nzz[i] */ 2315 2316 /* zero out the mask elements we set */ 2317 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2318 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2319 } 2320 2321 /* create our matrix */ 2322 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2323 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2324 ierr = MatSetType(A,type);CHKERRQ(ierr); 2325 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2326 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2327 2328 if (!rank) { 2329 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2330 /* read in my part of the matrix numerical values */ 2331 nz = procsnz[0]; 2332 vals = buf; 2333 mycols = ibuf; 2334 if (size == 1) nz -= extra_rows; 2335 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2336 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2337 2338 /* insert into matrix */ 2339 jj = rstart*bs; 2340 for (i=0; i<m; i++) { 2341 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2342 mycols += locrowlens[i]; 2343 vals += locrowlens[i]; 2344 jj++; 2345 } 2346 2347 /* read in other processors (except the last one) and ship out */ 2348 for (i=1; i<size-1; i++) { 2349 nz = procsnz[i]; 2350 vals = buf; 2351 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2352 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2353 } 2354 /* the last proc */ 2355 if (size != 1){ 2356 nz = procsnz[i] - extra_rows; 2357 vals = buf; 2358 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2359 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2360 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2361 } 2362 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2363 2364 } else { 2365 /* receive numeric values */ 2366 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2367 2368 /* receive message of values*/ 2369 vals = buf; 2370 mycols = ibuf; 2371 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2372 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2373 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2374 2375 /* insert into matrix */ 2376 jj = rstart*bs; 2377 for (i=0; i<m; i++) { 2378 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2379 mycols += locrowlens[i]; 2380 vals += locrowlens[i]; 2381 jj++; 2382 } 2383 } 2384 2385 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2386 ierr = PetscFree(buf);CHKERRQ(ierr); 2387 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2388 ierr = PetscFree(rowners);CHKERRQ(ierr); 2389 ierr = PetscFree(dlens);CHKERRQ(ierr); 2390 ierr = PetscFree(mask);CHKERRQ(ierr); 2391 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2392 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2393 *newmat = A; 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #undef __FUNCT__ 2398 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2399 /*XXXXX@ 2400 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2401 2402 Input Parameters: 2403 . mat - the matrix 2404 . fact - factor 2405 2406 Collective on Mat 2407 2408 Level: advanced 2409 2410 Notes: 2411 This can also be set by the command line option: -mat_use_hash_table fact 2412 2413 .keywords: matrix, hashtable, factor, HT 2414 2415 .seealso: MatSetOption() 2416 @XXXXX*/ 2417 2418 2419 #undef __FUNCT__ 2420 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2421 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2422 { 2423 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2424 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2425 PetscReal atmp; 2426 PetscReal *work,*svalues,*rvalues; 2427 PetscErrorCode ierr; 2428 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2429 PetscMPIInt rank,size; 2430 PetscInt *rowners_bs,dest,count,source; 2431 PetscScalar *va; 2432 MatScalar *ba; 2433 MPI_Status stat; 2434 2435 PetscFunctionBegin; 2436 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2437 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2438 2439 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2440 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2441 2442 bs = A->bs; 2443 mbs = a->mbs; 2444 Mbs = a->Mbs; 2445 ba = b->a; 2446 bi = b->i; 2447 bj = b->j; 2448 2449 /* find ownerships */ 2450 rowners_bs = a->rowners_bs; 2451 2452 /* each proc creates an array to be distributed */ 2453 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2454 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2455 2456 /* row_max for B */ 2457 if (rank != size-1){ 2458 for (i=0; i<mbs; i++) { 2459 ncols = bi[1] - bi[0]; bi++; 2460 brow = bs*i; 2461 for (j=0; j<ncols; j++){ 2462 bcol = bs*(*bj); 2463 for (kcol=0; kcol<bs; kcol++){ 2464 col = bcol + kcol; /* local col index */ 2465 col += rowners_bs[rank+1]; /* global col index */ 2466 for (krow=0; krow<bs; krow++){ 2467 atmp = PetscAbsScalar(*ba); ba++; 2468 row = brow + krow; /* local row index */ 2469 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2470 if (work[col] < atmp) work[col] = atmp; 2471 } 2472 } 2473 bj++; 2474 } 2475 } 2476 2477 /* send values to its owners */ 2478 for (dest=rank+1; dest<size; dest++){ 2479 svalues = work + rowners_bs[dest]; 2480 count = rowners_bs[dest+1]-rowners_bs[dest]; 2481 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2482 } 2483 } 2484 2485 /* receive values */ 2486 if (rank){ 2487 rvalues = work; 2488 count = rowners_bs[rank+1]-rowners_bs[rank]; 2489 for (source=0; source<rank; source++){ 2490 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2491 /* process values */ 2492 for (i=0; i<count; i++){ 2493 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2494 } 2495 } 2496 } 2497 2498 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2499 ierr = PetscFree(work);CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "MatRelax_MPISBAIJ" 2505 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2506 { 2507 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2508 PetscErrorCode ierr; 2509 PetscInt mbs=mat->mbs,bs=matin->bs; 2510 PetscScalar *x,*b,*ptr,zero=0.0; 2511 Vec bb1; 2512 2513 PetscFunctionBegin; 2514 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2515 if (bs > 1) 2516 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2517 2518 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2519 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2520 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2521 its--; 2522 } 2523 2524 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2525 while (its--){ 2526 2527 /* lower triangular part: slvec0b = - B^T*xx */ 2528 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2529 2530 /* copy xx into slvec0a */ 2531 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2532 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2533 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2534 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2535 2536 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2537 2538 /* copy bb into slvec1a */ 2539 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2540 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2541 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2542 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2543 2544 /* set slvec1b = 0 */ 2545 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2546 2547 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2548 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2549 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2550 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2551 2552 /* upper triangular part: bb1 = bb1 - B*x */ 2553 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2554 2555 /* local diagonal sweep */ 2556 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2557 } 2558 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2559 } else { 2560 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2561 } 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2567 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2568 { 2569 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2570 PetscErrorCode ierr; 2571 Vec lvec1,bb1; 2572 2573 PetscFunctionBegin; 2574 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2575 if (matin->bs > 1) 2576 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2577 2578 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2579 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2580 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2581 its--; 2582 } 2583 2584 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2585 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2586 while (its--){ 2587 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2588 2589 /* lower diagonal part: bb1 = bb - B^T*xx */ 2590 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2591 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2592 2593 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2594 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2595 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2596 2597 /* upper diagonal part: bb1 = bb1 - B*x */ 2598 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2599 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2600 2601 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2602 2603 /* diagonal sweep */ 2604 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2605 } 2606 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2607 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2608 } else { 2609 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2610 } 2611 PetscFunctionReturn(0); 2612 } 2613 2614