1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 104 N = nrow++ - 1; \ 105 /* shift up all the later entries in this row */ \ 106 for (ii=N; ii>=_i; ii--) { \ 107 rp[ii+1] = rp[ii]; \ 108 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 109 } \ 110 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 111 rp[_i] = bcol; \ 112 ap[bs2*_i + bs*cidx + ridx] = value; \ 113 a_noinsert:; \ 114 ailen[brow] = nrow; \ 115 } 116 #ifndef MatSetValues_SeqBAIJ_B_Private 117 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 118 { \ 119 brow = row/bs; \ 120 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 121 rmax = bimax[brow]; nrow = bilen[brow]; \ 122 bcol = col/bs; \ 123 ridx = row % bs; cidx = col % bs; \ 124 low = 0; high = nrow; \ 125 while (high-low > 3) { \ 126 t = (low+high)/2; \ 127 if (rp[t] > bcol) high = t; \ 128 else low = t; \ 129 } \ 130 for (_i=low; _i<high; _i++) { \ 131 if (rp[_i] > bcol) break; \ 132 if (rp[_i] == bcol) { \ 133 bap = ap + bs2*_i + bs*cidx + ridx; \ 134 if (addv == ADD_VALUES) *bap += value; \ 135 else *bap = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (b->nonew == 1) goto b_noinsert; \ 140 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 141 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 b_noinsert:; \ 152 bilen[brow] = nrow; \ 153 } 154 #endif 155 156 #if defined(PETSC_USE_MAT_SINGLE) 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetValues_MPISBAIJ" 159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 160 { 161 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,N = m*n; 164 MatScalar *vsingle; 165 166 PetscFunctionBegin; 167 if (N > b->setvalueslen) { 168 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 169 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 170 b->setvalueslen = N; 171 } 172 vsingle = b->setvaluescopy; 173 174 for (i=0; i<N; i++) { 175 vsingle[i] = v[i]; 176 } 177 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 184 { 185 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 186 PetscErrorCode ierr; 187 PetscInt i,N = m*n*b->bs2; 188 MatScalar *vsingle; 189 190 PetscFunctionBegin; 191 if (N > b->setvalueslen) { 192 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 193 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 194 b->setvalueslen = N; 195 } 196 vsingle = b->setvaluescopy; 197 for (i=0; i<N; i++) { 198 vsingle[i] = v[i]; 199 } 200 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 #endif 204 205 /* Only add/insert a(i,j) with i<=j (blocks). 206 Any a(i,j) with i>j input by user is ingored. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 211 { 212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 213 MatScalar value; 214 PetscTruth roworiented = baij->roworiented; 215 PetscErrorCode ierr; 216 PetscInt i,j,row,col; 217 PetscInt rstart_orig=mat->rmap.rstart; 218 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 219 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 220 221 /* Some Variables required in the macro */ 222 Mat A = baij->A; 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 224 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 225 MatScalar *aa=a->a; 226 227 Mat B = baij->B; 228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 229 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 230 MatScalar *ba=b->a; 231 232 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 233 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 234 MatScalar *ap,*bap; 235 236 /* for stash */ 237 PetscInt n_loc, *in_loc=0; 238 MatScalar *v_loc=0; 239 240 PetscFunctionBegin; 241 242 if(!baij->donotstash){ 243 ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr); 244 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 245 } 246 247 for (i=0; i<m; i++) { 248 if (im[i] < 0) continue; 249 #if defined(PETSC_USE_DEBUG) 250 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 251 #endif 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 253 row = im[i] - rstart_orig; /* local row index */ 254 for (j=0; j<n; j++) { 255 if (im[i]/bs > in[j]/bs){ 256 if (a->ignore_ltriangular){ 257 continue; /* ignore lower triangular blocks */ 258 } else { 259 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 260 } 261 } 262 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 263 col = in[j] - cstart_orig; /* local col index */ 264 brow = row/bs; bcol = col/bs; 265 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 266 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 267 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 268 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 269 } else if (in[j] < 0) continue; 270 #if defined(PETSC_USE_DEBUG) 271 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 272 #endif 273 else { /* off-diag entry (B) */ 274 if (mat->was_assembled) { 275 if (!baij->colmap) { 276 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 277 } 278 #if defined (PETSC_USE_CTABLE) 279 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 280 col = col - 1; 281 #else 282 col = baij->colmap[in[j]/bs] - 1; 283 #endif 284 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 285 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 286 col = in[j]; 287 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 288 B = baij->B; 289 b = (Mat_SeqBAIJ*)(B)->data; 290 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 291 ba=b->a; 292 } else col += in[j]%bs; 293 } else col = in[j]; 294 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 295 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 296 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 297 } 298 } 299 } else { /* off processor entry */ 300 if (!baij->donotstash) { 301 n_loc = 0; 302 for (j=0; j<n; j++){ 303 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 304 in_loc[n_loc] = in[j]; 305 if (roworiented) { 306 v_loc[n_loc] = v[i*n+j]; 307 } else { 308 v_loc[n_loc] = v[j*m+i]; 309 } 310 n_loc++; 311 } 312 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 313 } 314 } 315 } 316 317 if(!baij->donotstash){ 318 ierr = PetscFree(in_loc);CHKERRQ(ierr); 319 ierr = PetscFree(v_loc);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 326 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 327 { 328 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 329 const MatScalar *value; 330 MatScalar *barray=baij->barray; 331 PetscTruth roworiented = baij->roworiented; 332 PetscErrorCode ierr; 333 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 334 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 335 PetscInt cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2; 336 337 PetscFunctionBegin; 338 if(!barray) { 339 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 340 baij->barray = barray; 341 } 342 343 if (roworiented) { 344 stepval = (n-1)*bs; 345 } else { 346 stepval = (m-1)*bs; 347 } 348 for (i=0; i<m; i++) { 349 if (im[i] < 0) continue; 350 #if defined(PETSC_USE_DEBUG) 351 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 352 #endif 353 if (im[i] >= rstart && im[i] < rend) { 354 row = im[i] - rstart; 355 for (j=0; j<n; j++) { 356 /* If NumCol = 1 then a copy is not required */ 357 if ((roworiented) && (n == 1)) { 358 barray = (MatScalar*) v + i*bs2; 359 } else if((!roworiented) && (m == 1)) { 360 barray = (MatScalar*) v + j*bs2; 361 } else { /* Here a copy is required */ 362 if (roworiented) { 363 value = v + i*(stepval+bs)*bs + j*bs; 364 } else { 365 value = v + j*(stepval+bs)*bs + i*bs; 366 } 367 for (ii=0; ii<bs; ii++,value+=stepval) { 368 for (jj=0; jj<bs; jj++) { 369 *barray++ = *value++; 370 } 371 } 372 barray -=bs2; 373 } 374 375 if (in[j] >= cstart && in[j] < cend){ 376 col = in[j] - cstart; 377 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 378 } 379 else if (in[j] < 0) continue; 380 #if defined(PETSC_USE_DEBUG) 381 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 382 #endif 383 else { 384 if (mat->was_assembled) { 385 if (!baij->colmap) { 386 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 387 } 388 389 #if defined(PETSC_USE_DEBUG) 390 #if defined (PETSC_USE_CTABLE) 391 { PetscInt data; 392 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 393 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 394 } 395 #else 396 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 397 #endif 398 #endif 399 #if defined (PETSC_USE_CTABLE) 400 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 401 col = (col - 1)/bs; 402 #else 403 col = (baij->colmap[in[j]] - 1)/bs; 404 #endif 405 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 406 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 407 col = in[j]; 408 } 409 } 410 else col = in[j]; 411 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 412 } 413 } 414 } else { 415 if (!baij->donotstash) { 416 if (roworiented) { 417 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 418 } else { 419 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 420 } 421 } 422 } 423 } 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatGetValues_MPISBAIJ" 429 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 430 { 431 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 432 PetscErrorCode ierr; 433 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 434 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 435 436 PetscFunctionBegin; 437 for (i=0; i<m; i++) { 438 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 439 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 440 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 441 row = idxm[i] - bsrstart; 442 for (j=0; j<n; j++) { 443 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 444 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 445 if (idxn[j] >= bscstart && idxn[j] < bscend){ 446 col = idxn[j] - bscstart; 447 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 448 } else { 449 if (!baij->colmap) { 450 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 451 } 452 #if defined (PETSC_USE_CTABLE) 453 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 454 data --; 455 #else 456 data = baij->colmap[idxn[j]/bs]-1; 457 #endif 458 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 459 else { 460 col = data + idxn[j]%bs; 461 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 462 } 463 } 464 } 465 } else { 466 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 467 } 468 } 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatNorm_MPISBAIJ" 474 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 475 { 476 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 477 PetscErrorCode ierr; 478 PetscReal sum[2],*lnorm2; 479 480 PetscFunctionBegin; 481 if (baij->size == 1) { 482 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 483 } else { 484 if (type == NORM_FROBENIUS) { 485 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 486 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 487 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 488 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 489 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 490 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 491 *norm = sqrt(sum[0] + 2*sum[1]); 492 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 493 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 494 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 495 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 496 PetscReal *rsum,*rsum2,vabs; 497 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 498 PetscInt brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs; 499 MatScalar *v; 500 501 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 502 rsum2 = rsum + mat->cmap.N; 503 ierr = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 504 /* Amat */ 505 v = amat->a; jj = amat->j; 506 for (brow=0; brow<mbs; brow++) { 507 grow = bs*(rstart + brow); 508 nz = amat->i[brow+1] - amat->i[brow]; 509 for (bcol=0; bcol<nz; bcol++){ 510 gcol = bs*(rstart + *jj); jj++; 511 for (col=0; col<bs; col++){ 512 for (row=0; row<bs; row++){ 513 vabs = PetscAbsScalar(*v); v++; 514 rsum[gcol+col] += vabs; 515 /* non-diagonal block */ 516 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 517 } 518 } 519 } 520 } 521 /* Bmat */ 522 v = bmat->a; jj = bmat->j; 523 for (brow=0; brow<mbs; brow++) { 524 grow = bs*(rstart + brow); 525 nz = bmat->i[brow+1] - bmat->i[brow]; 526 for (bcol=0; bcol<nz; bcol++){ 527 gcol = bs*garray[*jj]; jj++; 528 for (col=0; col<bs; col++){ 529 for (row=0; row<bs; row++){ 530 vabs = PetscAbsScalar(*v); v++; 531 rsum[gcol+col] += vabs; 532 rsum[grow+row] += vabs; 533 } 534 } 535 } 536 } 537 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 538 *norm = 0.0; 539 for (col=0; col<mat->cmap.N; col++) { 540 if (rsum2[col] > *norm) *norm = rsum2[col]; 541 } 542 ierr = PetscFree(rsum);CHKERRQ(ierr); 543 } else { 544 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 545 } 546 } 547 PetscFunctionReturn(0); 548 } 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 552 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 553 { 554 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 555 PetscErrorCode ierr; 556 PetscInt nstash,reallocs; 557 InsertMode addv; 558 559 PetscFunctionBegin; 560 if (baij->donotstash) { 561 PetscFunctionReturn(0); 562 } 563 564 /* make sure all processors are either in INSERTMODE or ADDMODE */ 565 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 566 if (addv == (ADD_VALUES|INSERT_VALUES)) { 567 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 568 } 569 mat->insertmode = addv; /* in case this processor had no cache */ 570 571 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 572 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 573 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 574 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 575 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 576 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 582 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 583 { 584 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 585 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 586 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 589 PetscInt *row,*col,other_disassembled; 590 PetscMPIInt n; 591 PetscTruth r1,r2,r3; 592 MatScalar *val; 593 InsertMode addv = mat->insertmode; 594 595 PetscFunctionBegin; 596 597 if (!baij->donotstash) { 598 while (1) { 599 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 600 if (!flg) break; 601 602 for (i=0; i<n;) { 603 /* Now identify the consecutive vals belonging to the same row */ 604 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 605 if (j < n) ncols = j-i; 606 else ncols = n-i; 607 /* Now assemble all these values with a single function call */ 608 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 609 i = j; 610 } 611 } 612 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 613 /* Now process the block-stash. Since the values are stashed column-oriented, 614 set the roworiented flag to column oriented, and after MatSetValues() 615 restore the original flags */ 616 r1 = baij->roworiented; 617 r2 = a->roworiented; 618 r3 = b->roworiented; 619 baij->roworiented = PETSC_FALSE; 620 a->roworiented = PETSC_FALSE; 621 b->roworiented = PETSC_FALSE; 622 while (1) { 623 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 624 if (!flg) break; 625 626 for (i=0; i<n;) { 627 /* Now identify the consecutive vals belonging to the same row */ 628 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 629 if (j < n) ncols = j-i; 630 else ncols = n-i; 631 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 632 i = j; 633 } 634 } 635 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 636 baij->roworiented = r1; 637 a->roworiented = r2; 638 b->roworiented = r3; 639 } 640 641 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 642 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 643 644 /* determine if any processor has disassembled, if so we must 645 also disassemble ourselfs, in order that we may reassemble. */ 646 /* 647 if nonzero structure of submatrix B cannot change then we know that 648 no processor disassembled thus we can skip this stuff 649 */ 650 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 651 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 652 if (mat->was_assembled && !other_disassembled) { 653 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 654 } 655 } 656 657 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 658 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 659 } 660 b->compressedrow.use = PETSC_TRUE; 661 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 662 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 663 664 if (baij->rowvalues) { 665 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 666 baij->rowvalues = 0; 667 } 668 669 PetscFunctionReturn(0); 670 } 671 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 675 { 676 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 677 PetscErrorCode ierr; 678 PetscInt bs = mat->rmap.bs; 679 PetscMPIInt size = baij->size,rank = baij->rank; 680 PetscTruth iascii,isdraw; 681 PetscViewer sviewer; 682 PetscViewerFormat format; 683 684 PetscFunctionBegin; 685 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 686 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 687 if (iascii) { 688 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 689 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 690 MatInfo info; 691 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 692 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 694 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 695 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 696 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 697 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 698 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 699 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 700 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 701 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 702 PetscFunctionReturn(0); 703 } else if (format == PETSC_VIEWER_ASCII_INFO) { 704 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 707 PetscFunctionReturn(0); 708 } 709 } 710 711 if (isdraw) { 712 PetscDraw draw; 713 PetscTruth isnull; 714 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 715 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 716 } 717 718 if (size == 1) { 719 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 720 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 721 } else { 722 /* assemble the entire matrix onto first processor. */ 723 Mat A; 724 Mat_SeqSBAIJ *Aloc; 725 Mat_SeqBAIJ *Bloc; 726 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 727 MatScalar *a; 728 729 /* Should this be the same type as mat? */ 730 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 731 if (!rank) { 732 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 733 } else { 734 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 735 } 736 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 737 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 738 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 739 740 /* copy over the A part */ 741 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 742 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 743 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 744 745 for (i=0; i<mbs; i++) { 746 rvals[0] = mat->rmap.rstart + bs*i; 747 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 748 for (j=ai[i]; j<ai[i+1]; j++) { 749 col = mat->cmap.rstart+aj[j]*bs; 750 for (k=0; k<bs; k++) { 751 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 752 col++; a += bs; 753 } 754 } 755 } 756 /* copy over the B part */ 757 Bloc = (Mat_SeqBAIJ*)baij->B->data; 758 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 759 for (i=0; i<mbs; i++) { 760 rvals[0] = mat->rmap.rstart + bs; 761 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 762 for (j=ai[i]; j<ai[i+1]; j++) { 763 col = baij->garray[aj[j]]*bs; 764 for (k=0; k<bs; k++) { 765 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 766 col++; a += bs; 767 } 768 } 769 } 770 ierr = PetscFree(rvals);CHKERRQ(ierr); 771 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 772 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 773 /* 774 Everyone has to call to draw the matrix since the graphics waits are 775 synchronized across all processors that share the PetscDraw object 776 */ 777 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 778 if (!rank) { 779 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 780 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 781 } 782 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 783 ierr = MatDestroy(A);CHKERRQ(ierr); 784 } 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatView_MPISBAIJ" 790 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 791 { 792 PetscErrorCode ierr; 793 PetscTruth iascii,isdraw,issocket,isbinary; 794 795 PetscFunctionBegin; 796 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 797 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 798 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 799 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 800 if (iascii || isdraw || issocket || isbinary) { 801 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 802 } else { 803 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 804 } 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatDestroy_MPISBAIJ" 810 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 811 { 812 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 813 PetscErrorCode ierr; 814 815 PetscFunctionBegin; 816 #if defined(PETSC_USE_LOG) 817 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 818 #endif 819 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 820 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 821 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 822 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 823 #if defined (PETSC_USE_CTABLE) 824 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 825 #else 826 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 827 #endif 828 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 829 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 830 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 831 if (baij->slvec0) { 832 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 833 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 834 } 835 if (baij->slvec1) { 836 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 837 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 838 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 839 } 840 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 841 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 842 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 843 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 844 #if defined(PETSC_USE_MAT_SINGLE) 845 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 846 #endif 847 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 848 ierr = PetscFree(baij);CHKERRQ(ierr); 849 850 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 851 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 852 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 853 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatMult_MPISBAIJ" 859 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 860 { 861 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 862 PetscErrorCode ierr; 863 PetscInt nt,mbs=a->mbs,bs=A->rmap.bs; 864 PetscScalar *x,*from,zero=0.0; 865 866 PetscFunctionBegin; 867 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 868 if (nt != A->cmap.n) { 869 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 870 } 871 872 /* diagonal part */ 873 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 874 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 875 876 /* subdiagonal part */ 877 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 878 CHKMEMQ; 879 /* copy x into the vec slvec0 */ 880 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 881 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 882 CHKMEMQ; 883 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 884 CHKMEMQ; 885 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 886 887 CHKMEMQ; 888 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 889 CHKMEMQ; 890 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 891 CHKMEMQ; 892 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 893 CHKMEMQ; 894 /* supperdiagonal part */ 895 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 896 CHKMEMQ; 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 902 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 903 { 904 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscInt nt; 907 908 PetscFunctionBegin; 909 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 910 if (nt != A->cmap.n) { 911 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 912 } 913 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 914 if (nt != A->rmap.N) { 915 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 916 } 917 918 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 919 /* do diagonal part */ 920 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 921 /* do supperdiagonal part */ 922 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 923 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 924 /* do subdiagonal part */ 925 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 926 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 927 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 928 929 PetscFunctionReturn(0); 930 } 931 932 #undef __FUNCT__ 933 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 934 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 935 { 936 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 937 PetscErrorCode ierr; 938 PetscInt mbs=a->mbs,bs=A->rmap.bs; 939 PetscScalar *x,*from,zero=0.0; 940 941 PetscFunctionBegin; 942 /* 943 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 944 PetscSynchronizedFlush(A->comm); 945 */ 946 /* diagonal part */ 947 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 948 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 949 950 /* subdiagonal part */ 951 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 952 953 /* copy x into the vec slvec0 */ 954 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 955 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 956 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 957 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 958 959 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 960 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 961 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 962 963 /* supperdiagonal part */ 964 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 965 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 971 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 972 { 973 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 978 /* do diagonal part */ 979 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 980 /* do supperdiagonal part */ 981 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 982 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 983 984 /* do subdiagonal part */ 985 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 986 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 987 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 988 989 PetscFunctionReturn(0); 990 } 991 992 /* 993 This only works correctly for square matrices where the subblock A->A is the 994 diagonal block 995 */ 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 998 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 999 { 1000 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 1003 PetscFunctionBegin; 1004 /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1005 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatScale_MPISBAIJ" 1011 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1012 { 1013 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1014 PetscErrorCode ierr; 1015 1016 PetscFunctionBegin; 1017 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1018 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1024 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1025 { 1026 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1027 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1028 PetscErrorCode ierr; 1029 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1030 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1031 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1032 1033 PetscFunctionBegin; 1034 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1035 mat->getrowactive = PETSC_TRUE; 1036 1037 if (!mat->rowvalues && (idx || v)) { 1038 /* 1039 allocate enough space to hold information from the longest row. 1040 */ 1041 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1042 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1043 PetscInt max = 1,mbs = mat->mbs,tmp; 1044 for (i=0; i<mbs; i++) { 1045 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1046 if (max < tmp) { max = tmp; } 1047 } 1048 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1049 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1050 } 1051 1052 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1053 lrow = row - brstart; /* local row index */ 1054 1055 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1056 if (!v) {pvA = 0; pvB = 0;} 1057 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1058 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1059 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1060 nztot = nzA + nzB; 1061 1062 cmap = mat->garray; 1063 if (v || idx) { 1064 if (nztot) { 1065 /* Sort by increasing column numbers, assuming A and B already sorted */ 1066 PetscInt imark = -1; 1067 if (v) { 1068 *v = v_p = mat->rowvalues; 1069 for (i=0; i<nzB; i++) { 1070 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1071 else break; 1072 } 1073 imark = i; 1074 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1075 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1076 } 1077 if (idx) { 1078 *idx = idx_p = mat->rowindices; 1079 if (imark > -1) { 1080 for (i=0; i<imark; i++) { 1081 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1082 } 1083 } else { 1084 for (i=0; i<nzB; i++) { 1085 if (cmap[cworkB[i]/bs] < cstart) 1086 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1087 else break; 1088 } 1089 imark = i; 1090 } 1091 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1092 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1093 } 1094 } else { 1095 if (idx) *idx = 0; 1096 if (v) *v = 0; 1097 } 1098 } 1099 *nz = nztot; 1100 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1101 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1107 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1108 { 1109 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1110 1111 PetscFunctionBegin; 1112 if (!baij->getrowactive) { 1113 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1114 } 1115 baij->getrowactive = PETSC_FALSE; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1121 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1122 { 1123 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1124 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1125 1126 PetscFunctionBegin; 1127 aA->getrow_utriangular = PETSC_TRUE; 1128 PetscFunctionReturn(0); 1129 } 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1132 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1133 { 1134 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1135 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1136 1137 PetscFunctionBegin; 1138 aA->getrow_utriangular = PETSC_FALSE; 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1144 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1145 { 1146 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1147 PetscErrorCode ierr; 1148 1149 PetscFunctionBegin; 1150 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1151 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1157 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1158 { 1159 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1160 PetscErrorCode ierr; 1161 1162 PetscFunctionBegin; 1163 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1164 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNCT__ 1169 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1170 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1171 { 1172 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1173 PetscErrorCode ierr; 1174 1175 PetscFunctionBegin; 1176 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1177 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1183 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1184 { 1185 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1186 Mat A = a->A,B = a->B; 1187 PetscErrorCode ierr; 1188 PetscReal isend[5],irecv[5]; 1189 1190 PetscFunctionBegin; 1191 info->block_size = (PetscReal)matin->rmap.bs; 1192 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1193 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1194 isend[3] = info->memory; isend[4] = info->mallocs; 1195 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1196 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1197 isend[3] += info->memory; isend[4] += info->mallocs; 1198 if (flag == MAT_LOCAL) { 1199 info->nz_used = isend[0]; 1200 info->nz_allocated = isend[1]; 1201 info->nz_unneeded = isend[2]; 1202 info->memory = isend[3]; 1203 info->mallocs = isend[4]; 1204 } else if (flag == MAT_GLOBAL_MAX) { 1205 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1206 info->nz_used = irecv[0]; 1207 info->nz_allocated = irecv[1]; 1208 info->nz_unneeded = irecv[2]; 1209 info->memory = irecv[3]; 1210 info->mallocs = irecv[4]; 1211 } else if (flag == MAT_GLOBAL_SUM) { 1212 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1213 info->nz_used = irecv[0]; 1214 info->nz_allocated = irecv[1]; 1215 info->nz_unneeded = irecv[2]; 1216 info->memory = irecv[3]; 1217 info->mallocs = irecv[4]; 1218 } else { 1219 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1220 } 1221 info->rows_global = (PetscReal)A->rmap.N; 1222 info->columns_global = (PetscReal)A->cmap.N; 1223 info->rows_local = (PetscReal)A->rmap.N; 1224 info->columns_local = (PetscReal)A->cmap.N; 1225 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1226 info->fill_ratio_needed = 0; 1227 info->factor_mallocs = 0; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1233 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1234 { 1235 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1236 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1237 PetscErrorCode ierr; 1238 1239 PetscFunctionBegin; 1240 switch (op) { 1241 case MAT_NO_NEW_NONZERO_LOCATIONS: 1242 case MAT_YES_NEW_NONZERO_LOCATIONS: 1243 case MAT_COLUMNS_UNSORTED: 1244 case MAT_COLUMNS_SORTED: 1245 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1246 case MAT_KEEP_ZEROED_ROWS: 1247 case MAT_NEW_NONZERO_LOCATION_ERR: 1248 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1249 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1250 break; 1251 case MAT_ROW_ORIENTED: 1252 a->roworiented = PETSC_TRUE; 1253 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1254 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1255 break; 1256 case MAT_ROWS_SORTED: 1257 case MAT_ROWS_UNSORTED: 1258 case MAT_YES_NEW_DIAGONALS: 1259 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1260 break; 1261 case MAT_COLUMN_ORIENTED: 1262 a->roworiented = PETSC_FALSE; 1263 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1264 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1265 break; 1266 case MAT_IGNORE_OFF_PROC_ENTRIES: 1267 a->donotstash = PETSC_TRUE; 1268 break; 1269 case MAT_NO_NEW_DIAGONALS: 1270 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1271 case MAT_USE_HASH_TABLE: 1272 a->ht_flag = PETSC_TRUE; 1273 break; 1274 case MAT_NOT_SYMMETRIC: 1275 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1276 case MAT_HERMITIAN: 1277 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1278 case MAT_SYMMETRIC: 1279 case MAT_STRUCTURALLY_SYMMETRIC: 1280 case MAT_NOT_HERMITIAN: 1281 case MAT_SYMMETRY_ETERNAL: 1282 case MAT_NOT_SYMMETRY_ETERNAL: 1283 break; 1284 case MAT_IGNORE_LOWER_TRIANGULAR: 1285 aA->ignore_ltriangular = PETSC_TRUE; 1286 break; 1287 case MAT_ERROR_LOWER_TRIANGULAR: 1288 aA->ignore_ltriangular = PETSC_FALSE; 1289 break; 1290 case MAT_GETROW_UPPERTRIANGULAR: 1291 aA->getrow_utriangular = PETSC_TRUE; 1292 break; 1293 default: 1294 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1295 } 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1301 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1302 { 1303 PetscErrorCode ierr; 1304 PetscFunctionBegin; 1305 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1311 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1312 { 1313 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1314 Mat a=baij->A, b=baij->B; 1315 PetscErrorCode ierr; 1316 PetscInt nv,m,n; 1317 PetscTruth flg; 1318 1319 PetscFunctionBegin; 1320 if (ll != rr){ 1321 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1322 if (!flg) 1323 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1324 } 1325 if (!ll) PetscFunctionReturn(0); 1326 1327 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1328 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1329 1330 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1331 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1332 1333 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1334 1335 /* left diagonalscale the off-diagonal part */ 1336 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1337 1338 /* scale the diagonal part */ 1339 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1340 1341 /* right diagonalscale the off-diagonal part */ 1342 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1343 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1349 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1350 { 1351 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1352 MPI_Comm comm = A->comm; 1353 static PetscTruth called = PETSC_FALSE; 1354 PetscErrorCode ierr; 1355 1356 PetscFunctionBegin; 1357 if (!a->rank) { 1358 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1359 } 1360 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1361 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1362 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1368 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1369 { 1370 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1371 PetscErrorCode ierr; 1372 1373 PetscFunctionBegin; 1374 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatEqual_MPISBAIJ" 1382 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1383 { 1384 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1385 Mat a,b,c,d; 1386 PetscTruth flg; 1387 PetscErrorCode ierr; 1388 1389 PetscFunctionBegin; 1390 a = matA->A; b = matA->B; 1391 c = matB->A; d = matB->B; 1392 1393 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1394 if (flg) { 1395 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1396 } 1397 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatCopy_MPISBAIJ" 1403 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1404 { 1405 PetscErrorCode ierr; 1406 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1407 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1408 1409 PetscFunctionBegin; 1410 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1411 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1412 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1413 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1414 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1415 } else { 1416 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1417 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1418 } 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1424 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1425 { 1426 PetscErrorCode ierr; 1427 1428 PetscFunctionBegin; 1429 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #include "petscblaslapack.h" 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1436 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1437 { 1438 PetscErrorCode ierr; 1439 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1440 PetscBLASInt bnz,one=1; 1441 Mat_SeqSBAIJ *xa,*ya; 1442 Mat_SeqBAIJ *xb,*yb; 1443 1444 PetscFunctionBegin; 1445 if (str == SAME_NONZERO_PATTERN) { 1446 PetscScalar alpha = a; 1447 xa = (Mat_SeqSBAIJ *)xx->A->data; 1448 ya = (Mat_SeqSBAIJ *)yy->A->data; 1449 bnz = (PetscBLASInt)xa->nz; 1450 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1451 xb = (Mat_SeqBAIJ *)xx->B->data; 1452 yb = (Mat_SeqBAIJ *)yy->B->data; 1453 bnz = (PetscBLASInt)xb->nz; 1454 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1455 } else { 1456 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1457 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1458 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1459 } 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1465 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1466 { 1467 PetscErrorCode ierr; 1468 PetscInt i; 1469 PetscTruth flg; 1470 1471 PetscFunctionBegin; 1472 for (i=0; i<n; i++) { 1473 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1474 if (!flg) { 1475 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1476 } 1477 } 1478 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1479 PetscFunctionReturn(0); 1480 } 1481 1482 1483 /* -------------------------------------------------------------------*/ 1484 static struct _MatOps MatOps_Values = { 1485 MatSetValues_MPISBAIJ, 1486 MatGetRow_MPISBAIJ, 1487 MatRestoreRow_MPISBAIJ, 1488 MatMult_MPISBAIJ, 1489 /* 4*/ MatMultAdd_MPISBAIJ, 1490 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1491 MatMultAdd_MPISBAIJ, 1492 0, 1493 0, 1494 0, 1495 /*10*/ 0, 1496 0, 1497 0, 1498 MatRelax_MPISBAIJ, 1499 MatTranspose_MPISBAIJ, 1500 /*15*/ MatGetInfo_MPISBAIJ, 1501 MatEqual_MPISBAIJ, 1502 MatGetDiagonal_MPISBAIJ, 1503 MatDiagonalScale_MPISBAIJ, 1504 MatNorm_MPISBAIJ, 1505 /*20*/ MatAssemblyBegin_MPISBAIJ, 1506 MatAssemblyEnd_MPISBAIJ, 1507 0, 1508 MatSetOption_MPISBAIJ, 1509 MatZeroEntries_MPISBAIJ, 1510 /*25*/ 0, 1511 0, 1512 0, 1513 0, 1514 0, 1515 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1516 0, 1517 0, 1518 0, 1519 0, 1520 /*35*/ MatDuplicate_MPISBAIJ, 1521 0, 1522 0, 1523 0, 1524 0, 1525 /*40*/ MatAXPY_MPISBAIJ, 1526 MatGetSubMatrices_MPISBAIJ, 1527 MatIncreaseOverlap_MPISBAIJ, 1528 MatGetValues_MPISBAIJ, 1529 MatCopy_MPISBAIJ, 1530 /*45*/ MatPrintHelp_MPISBAIJ, 1531 MatScale_MPISBAIJ, 1532 0, 1533 0, 1534 0, 1535 /*50*/ 0, 1536 0, 1537 0, 1538 0, 1539 0, 1540 /*55*/ 0, 1541 0, 1542 MatSetUnfactored_MPISBAIJ, 1543 0, 1544 MatSetValuesBlocked_MPISBAIJ, 1545 /*60*/ 0, 1546 0, 1547 0, 1548 0, 1549 0, 1550 /*65*/ 0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 /*70*/ MatGetRowMax_MPISBAIJ, 1556 0, 1557 0, 1558 0, 1559 0, 1560 /*75*/ 0, 1561 0, 1562 0, 1563 0, 1564 0, 1565 /*80*/ 0, 1566 0, 1567 0, 1568 0, 1569 MatLoad_MPISBAIJ, 1570 /*85*/ 0, 1571 0, 1572 0, 1573 0, 1574 0, 1575 /*90*/ 0, 1576 0, 1577 0, 1578 0, 1579 0, 1580 /*95*/ 0, 1581 0, 1582 0, 1583 0, 1584 0, 1585 /*100*/0, 1586 0, 1587 0, 1588 0, 1589 0, 1590 /*105*/0, 1591 MatRealPart_MPISBAIJ, 1592 MatImaginaryPart_MPISBAIJ, 1593 MatGetRowUpperTriangular_MPISBAIJ, 1594 MatRestoreRowUpperTriangular_MPISBAIJ 1595 }; 1596 1597 1598 EXTERN_C_BEGIN 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1601 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1602 { 1603 PetscFunctionBegin; 1604 *a = ((Mat_MPISBAIJ *)A->data)->A; 1605 *iscopy = PETSC_FALSE; 1606 PetscFunctionReturn(0); 1607 } 1608 EXTERN_C_END 1609 1610 EXTERN_C_BEGIN 1611 #undef __FUNCT__ 1612 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1613 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1614 { 1615 Mat_MPISBAIJ *b; 1616 PetscErrorCode ierr; 1617 PetscInt i,mbs,Mbs; 1618 1619 PetscFunctionBegin; 1620 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1621 1622 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1623 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1624 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1625 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1626 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1627 1628 B->rmap.bs = B->cmap.bs = bs; 1629 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1630 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1631 1632 if (d_nnz) { 1633 for (i=0; i<B->rmap.n/bs; i++) { 1634 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1635 } 1636 } 1637 if (o_nnz) { 1638 for (i=0; i<B->rmap.n/bs; i++) { 1639 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1640 } 1641 } 1642 B->preallocated = PETSC_TRUE; 1643 1644 b = (Mat_MPISBAIJ*)B->data; 1645 mbs = B->rmap.n/bs; 1646 Mbs = B->rmap.N/bs; 1647 if (mbs*bs != B->rmap.n) { 1648 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs); 1649 } 1650 1651 B->rmap.bs = bs; 1652 b->bs2 = bs*bs; 1653 b->mbs = mbs; 1654 b->nbs = mbs; 1655 b->Mbs = Mbs; 1656 b->Nbs = Mbs; 1657 1658 for (i=0; i<=b->size; i++) { 1659 b->rangebs[i] = B->rmap.range[i]/bs; 1660 } 1661 b->rstartbs = B->rmap.rstart/bs; 1662 b->rendbs = B->rmap.rend/bs; 1663 1664 b->cstartbs = B->cmap.rstart/bs; 1665 b->cendbs = B->cmap.rend/bs; 1666 1667 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1668 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1669 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1670 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1671 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1672 1673 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1674 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1675 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1676 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1677 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1678 1679 /* build cache for off array entries formed */ 1680 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1681 1682 PetscFunctionReturn(0); 1683 } 1684 EXTERN_C_END 1685 1686 /*MC 1687 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1688 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1689 1690 Options Database Keys: 1691 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1692 1693 Level: beginner 1694 1695 .seealso: MatCreateMPISBAIJ 1696 M*/ 1697 1698 EXTERN_C_BEGIN 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatCreate_MPISBAIJ" 1701 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1702 { 1703 Mat_MPISBAIJ *b; 1704 PetscErrorCode ierr; 1705 PetscTruth flg; 1706 1707 PetscFunctionBegin; 1708 1709 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1710 B->data = (void*)b; 1711 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1712 1713 B->ops->destroy = MatDestroy_MPISBAIJ; 1714 B->ops->view = MatView_MPISBAIJ; 1715 B->mapping = 0; 1716 B->factor = 0; 1717 B->assembled = PETSC_FALSE; 1718 1719 B->insertmode = NOT_SET_VALUES; 1720 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1721 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1722 1723 /* build local table of row and column ownerships */ 1724 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1725 1726 /* build cache for off array entries formed */ 1727 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1728 b->donotstash = PETSC_FALSE; 1729 b->colmap = PETSC_NULL; 1730 b->garray = PETSC_NULL; 1731 b->roworiented = PETSC_TRUE; 1732 1733 #if defined(PETSC_USE_MAT_SINGLE) 1734 /* stuff for MatSetValues_XXX in single precision */ 1735 b->setvalueslen = 0; 1736 b->setvaluescopy = PETSC_NULL; 1737 #endif 1738 1739 /* stuff used in block assembly */ 1740 b->barray = 0; 1741 1742 /* stuff used for matrix vector multiply */ 1743 b->lvec = 0; 1744 b->Mvctx = 0; 1745 b->slvec0 = 0; 1746 b->slvec0b = 0; 1747 b->slvec1 = 0; 1748 b->slvec1a = 0; 1749 b->slvec1b = 0; 1750 b->sMvctx = 0; 1751 1752 /* stuff for MatGetRow() */ 1753 b->rowindices = 0; 1754 b->rowvalues = 0; 1755 b->getrowactive = PETSC_FALSE; 1756 1757 /* hash table stuff */ 1758 b->ht = 0; 1759 b->hd = 0; 1760 b->ht_size = 0; 1761 b->ht_flag = PETSC_FALSE; 1762 b->ht_fact = 0; 1763 b->ht_total_ct = 0; 1764 b->ht_insert_ct = 0; 1765 1766 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1767 if (flg) { 1768 PetscReal fact = 1.39; 1769 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1770 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1771 if (fact <= 1.0) fact = 1.39; 1772 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1773 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1774 } 1775 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1776 "MatStoreValues_MPISBAIJ", 1777 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1778 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1779 "MatRetrieveValues_MPISBAIJ", 1780 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1781 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1782 "MatGetDiagonalBlock_MPISBAIJ", 1783 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1784 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1785 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1786 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1787 B->symmetric = PETSC_TRUE; 1788 B->structurally_symmetric = PETSC_TRUE; 1789 B->symmetric_set = PETSC_TRUE; 1790 B->structurally_symmetric_set = PETSC_TRUE; 1791 PetscFunctionReturn(0); 1792 } 1793 EXTERN_C_END 1794 1795 /*MC 1796 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1797 1798 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1799 and MATMPISBAIJ otherwise. 1800 1801 Options Database Keys: 1802 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1803 1804 Level: beginner 1805 1806 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1807 M*/ 1808 1809 EXTERN_C_BEGIN 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatCreate_SBAIJ" 1812 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1813 { 1814 PetscErrorCode ierr; 1815 PetscMPIInt size; 1816 1817 PetscFunctionBegin; 1818 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1819 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1820 if (size == 1) { 1821 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1822 } else { 1823 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1824 } 1825 PetscFunctionReturn(0); 1826 } 1827 EXTERN_C_END 1828 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1831 /*@C 1832 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1833 the user should preallocate the matrix storage by setting the parameters 1834 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1835 performance can be increased by more than a factor of 50. 1836 1837 Collective on Mat 1838 1839 Input Parameters: 1840 + A - the matrix 1841 . bs - size of blockk 1842 . d_nz - number of block nonzeros per block row in diagonal portion of local 1843 submatrix (same for all local rows) 1844 . d_nnz - array containing the number of block nonzeros in the various block rows 1845 in the upper triangular and diagonal part of the in diagonal portion of the local 1846 (possibly different for each block row) or PETSC_NULL. You must leave room 1847 for the diagonal entry even if it is zero. 1848 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1849 submatrix (same for all local rows). 1850 - o_nnz - array containing the number of nonzeros in the various block rows of the 1851 off-diagonal portion of the local submatrix (possibly different for 1852 each block row) or PETSC_NULL. 1853 1854 1855 Options Database Keys: 1856 . -mat_no_unroll - uses code that does not unroll the loops in the 1857 block calculations (much slower) 1858 . -mat_block_size - size of the blocks to use 1859 1860 Notes: 1861 1862 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1863 than it must be used on all processors that share the object for that argument. 1864 1865 If the *_nnz parameter is given then the *_nz parameter is ignored 1866 1867 Storage Information: 1868 For a square global matrix we define each processor's diagonal portion 1869 to be its local rows and the corresponding columns (a square submatrix); 1870 each processor's off-diagonal portion encompasses the remainder of the 1871 local matrix (a rectangular submatrix). 1872 1873 The user can specify preallocated storage for the diagonal part of 1874 the local submatrix with either d_nz or d_nnz (not both). Set 1875 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1876 memory allocation. Likewise, specify preallocated storage for the 1877 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1878 1879 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1880 the figure below we depict these three local rows and all columns (0-11). 1881 1882 .vb 1883 0 1 2 3 4 5 6 7 8 9 10 11 1884 ------------------- 1885 row 3 | o o o d d d o o o o o o 1886 row 4 | o o o d d d o o o o o o 1887 row 5 | o o o d d d o o o o o o 1888 ------------------- 1889 .ve 1890 1891 Thus, any entries in the d locations are stored in the d (diagonal) 1892 submatrix, and any entries in the o locations are stored in the 1893 o (off-diagonal) submatrix. Note that the d matrix is stored in 1894 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1895 1896 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1897 plus the diagonal part of the d matrix, 1898 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1899 In general, for PDE problems in which most nonzeros are near the diagonal, 1900 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1901 or you will get TERRIBLE performance; see the users' manual chapter on 1902 matrices. 1903 1904 Level: intermediate 1905 1906 .keywords: matrix, block, aij, compressed row, sparse, parallel 1907 1908 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1909 @*/ 1910 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1911 { 1912 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1913 1914 PetscFunctionBegin; 1915 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1916 if (f) { 1917 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1918 } 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatCreateMPISBAIJ" 1924 /*@C 1925 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1926 (block compressed row). For good matrix assembly performance 1927 the user should preallocate the matrix storage by setting the parameters 1928 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1929 performance can be increased by more than a factor of 50. 1930 1931 Collective on MPI_Comm 1932 1933 Input Parameters: 1934 + comm - MPI communicator 1935 . bs - size of blockk 1936 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1937 This value should be the same as the local size used in creating the 1938 y vector for the matrix-vector product y = Ax. 1939 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1940 This value should be the same as the local size used in creating the 1941 x vector for the matrix-vector product y = Ax. 1942 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1943 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1944 . d_nz - number of block nonzeros per block row in diagonal portion of local 1945 submatrix (same for all local rows) 1946 . d_nnz - array containing the number of block nonzeros in the various block rows 1947 in the upper triangular portion of the in diagonal portion of the local 1948 (possibly different for each block block row) or PETSC_NULL. 1949 You must leave room for the diagonal entry even if it is zero. 1950 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1951 submatrix (same for all local rows). 1952 - o_nnz - array containing the number of nonzeros in the various block rows of the 1953 off-diagonal portion of the local submatrix (possibly different for 1954 each block row) or PETSC_NULL. 1955 1956 Output Parameter: 1957 . A - the matrix 1958 1959 Options Database Keys: 1960 . -mat_no_unroll - uses code that does not unroll the loops in the 1961 block calculations (much slower) 1962 . -mat_block_size - size of the blocks to use 1963 . -mat_mpi - use the parallel matrix data structures even on one processor 1964 (defaults to using SeqBAIJ format on one processor) 1965 1966 Notes: 1967 The number of rows and columns must be divisible by blocksize. 1968 1969 The user MUST specify either the local or global matrix dimensions 1970 (possibly both). 1971 1972 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1973 than it must be used on all processors that share the object for that argument. 1974 1975 If the *_nnz parameter is given then the *_nz parameter is ignored 1976 1977 Storage Information: 1978 For a square global matrix we define each processor's diagonal portion 1979 to be its local rows and the corresponding columns (a square submatrix); 1980 each processor's off-diagonal portion encompasses the remainder of the 1981 local matrix (a rectangular submatrix). 1982 1983 The user can specify preallocated storage for the diagonal part of 1984 the local submatrix with either d_nz or d_nnz (not both). Set 1985 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1986 memory allocation. Likewise, specify preallocated storage for the 1987 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1988 1989 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1990 the figure below we depict these three local rows and all columns (0-11). 1991 1992 .vb 1993 0 1 2 3 4 5 6 7 8 9 10 11 1994 ------------------- 1995 row 3 | o o o d d d o o o o o o 1996 row 4 | o o o d d d o o o o o o 1997 row 5 | o o o d d d o o o o o o 1998 ------------------- 1999 .ve 2000 2001 Thus, any entries in the d locations are stored in the d (diagonal) 2002 submatrix, and any entries in the o locations are stored in the 2003 o (off-diagonal) submatrix. Note that the d matrix is stored in 2004 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2005 2006 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2007 plus the diagonal part of the d matrix, 2008 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2009 In general, for PDE problems in which most nonzeros are near the diagonal, 2010 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2011 or you will get TERRIBLE performance; see the users' manual chapter on 2012 matrices. 2013 2014 Level: intermediate 2015 2016 .keywords: matrix, block, aij, compressed row, sparse, parallel 2017 2018 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2019 @*/ 2020 2021 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2022 { 2023 PetscErrorCode ierr; 2024 PetscMPIInt size; 2025 2026 PetscFunctionBegin; 2027 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2028 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2029 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2030 if (size > 1) { 2031 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2032 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2033 } else { 2034 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2035 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2036 } 2037 PetscFunctionReturn(0); 2038 } 2039 2040 2041 #undef __FUNCT__ 2042 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2043 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2044 { 2045 Mat mat; 2046 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2047 PetscErrorCode ierr; 2048 PetscInt len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs; 2049 PetscScalar *array; 2050 2051 PetscFunctionBegin; 2052 *newmat = 0; 2053 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2054 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2055 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2056 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2057 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2058 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2059 2060 mat->factor = matin->factor; 2061 mat->preallocated = PETSC_TRUE; 2062 mat->assembled = PETSC_TRUE; 2063 mat->insertmode = NOT_SET_VALUES; 2064 2065 a = (Mat_MPISBAIJ*)mat->data; 2066 a->bs2 = oldmat->bs2; 2067 a->mbs = oldmat->mbs; 2068 a->nbs = oldmat->nbs; 2069 a->Mbs = oldmat->Mbs; 2070 a->Nbs = oldmat->Nbs; 2071 2072 2073 a->size = oldmat->size; 2074 a->rank = oldmat->rank; 2075 a->donotstash = oldmat->donotstash; 2076 a->roworiented = oldmat->roworiented; 2077 a->rowindices = 0; 2078 a->rowvalues = 0; 2079 a->getrowactive = PETSC_FALSE; 2080 a->barray = 0; 2081 a->rstartbs = oldmat->rstartbs; 2082 a->rendbs = oldmat->rendbs; 2083 a->cstartbs = oldmat->cstartbs; 2084 a->cendbs = oldmat->cendbs; 2085 2086 /* hash table stuff */ 2087 a->ht = 0; 2088 a->hd = 0; 2089 a->ht_size = 0; 2090 a->ht_flag = oldmat->ht_flag; 2091 a->ht_fact = oldmat->ht_fact; 2092 a->ht_total_ct = 0; 2093 a->ht_insert_ct = 0; 2094 2095 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2096 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2097 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2098 if (oldmat->colmap) { 2099 #if defined (PETSC_USE_CTABLE) 2100 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2101 #else 2102 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2103 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2104 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2105 #endif 2106 } else a->colmap = 0; 2107 2108 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2109 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2110 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2111 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2112 } else a->garray = 0; 2113 2114 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2115 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2116 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2117 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2118 2119 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2120 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2121 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2122 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2123 2124 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2125 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2126 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2127 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2128 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2129 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2130 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2131 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2132 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2133 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2134 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2135 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2136 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2137 2138 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2139 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2140 a->sMvctx = oldmat->sMvctx; 2141 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2142 2143 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2144 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2145 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2146 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2147 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2148 *newmat = mat; 2149 PetscFunctionReturn(0); 2150 } 2151 2152 #include "petscsys.h" 2153 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatLoad_MPISBAIJ" 2156 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2157 { 2158 Mat A; 2159 PetscErrorCode ierr; 2160 PetscInt i,nz,j,rstart,rend; 2161 PetscScalar *vals,*buf; 2162 MPI_Comm comm = ((PetscObject)viewer)->comm; 2163 MPI_Status status; 2164 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 2165 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2166 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2167 PetscInt bs=1,Mbs,mbs,extra_rows; 2168 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2169 PetscInt dcount,kmax,k,nzcount,tmp; 2170 int fd; 2171 2172 PetscFunctionBegin; 2173 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2174 2175 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2176 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2177 if (!rank) { 2178 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2179 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2180 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2181 if (header[3] < 0) { 2182 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2183 } 2184 } 2185 2186 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2187 M = header[1]; N = header[2]; 2188 2189 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2190 2191 /* 2192 This code adds extra rows to make sure the number of rows is 2193 divisible by the blocksize 2194 */ 2195 Mbs = M/bs; 2196 extra_rows = bs - M + bs*(Mbs); 2197 if (extra_rows == bs) extra_rows = 0; 2198 else Mbs++; 2199 if (extra_rows &&!rank) { 2200 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2201 } 2202 2203 /* determine ownership of all rows */ 2204 mbs = Mbs/size + ((Mbs % size) > rank); 2205 m = mbs*bs; 2206 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2207 browners = rowners + size + 1; 2208 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2209 rowners[0] = 0; 2210 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2211 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2212 rstart = rowners[rank]; 2213 rend = rowners[rank+1]; 2214 2215 /* distribute row lengths to all processors */ 2216 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2217 if (!rank) { 2218 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2219 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2220 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2221 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2222 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2223 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2224 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2225 } else { 2226 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2227 } 2228 2229 if (!rank) { /* procs[0] */ 2230 /* calculate the number of nonzeros on each processor */ 2231 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2232 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2233 for (i=0; i<size; i++) { 2234 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2235 procsnz[i] += rowlengths[j]; 2236 } 2237 } 2238 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2239 2240 /* determine max buffer needed and allocate it */ 2241 maxnz = 0; 2242 for (i=0; i<size; i++) { 2243 maxnz = PetscMax(maxnz,procsnz[i]); 2244 } 2245 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2246 2247 /* read in my part of the matrix column indices */ 2248 nz = procsnz[0]; 2249 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2250 mycols = ibuf; 2251 if (size == 1) nz -= extra_rows; 2252 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2253 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2254 2255 /* read in every ones (except the last) and ship off */ 2256 for (i=1; i<size-1; i++) { 2257 nz = procsnz[i]; 2258 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2259 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2260 } 2261 /* read in the stuff for the last proc */ 2262 if (size != 1) { 2263 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2264 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2265 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2266 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2267 } 2268 ierr = PetscFree(cols);CHKERRQ(ierr); 2269 } else { /* procs[i], i>0 */ 2270 /* determine buffer space needed for message */ 2271 nz = 0; 2272 for (i=0; i<m; i++) { 2273 nz += locrowlens[i]; 2274 } 2275 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2276 mycols = ibuf; 2277 /* receive message of column indices*/ 2278 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2279 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2280 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2281 } 2282 2283 /* loop over local rows, determining number of off diagonal entries */ 2284 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2285 odlens = dlens + (rend-rstart); 2286 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2287 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2288 masked1 = mask + Mbs; 2289 masked2 = masked1 + Mbs; 2290 rowcount = 0; nzcount = 0; 2291 for (i=0; i<mbs; i++) { 2292 dcount = 0; 2293 odcount = 0; 2294 for (j=0; j<bs; j++) { 2295 kmax = locrowlens[rowcount]; 2296 for (k=0; k<kmax; k++) { 2297 tmp = mycols[nzcount++]/bs; /* block col. index */ 2298 if (!mask[tmp]) { 2299 mask[tmp] = 1; 2300 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2301 else masked1[dcount++] = tmp; /* entry in diag portion */ 2302 } 2303 } 2304 rowcount++; 2305 } 2306 2307 dlens[i] = dcount; /* d_nzz[i] */ 2308 odlens[i] = odcount; /* o_nzz[i] */ 2309 2310 /* zero out the mask elements we set */ 2311 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2312 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2313 } 2314 2315 /* create our matrix */ 2316 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2317 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2318 ierr = MatSetType(A,type);CHKERRQ(ierr); 2319 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2320 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2321 2322 if (!rank) { 2323 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2324 /* read in my part of the matrix numerical values */ 2325 nz = procsnz[0]; 2326 vals = buf; 2327 mycols = ibuf; 2328 if (size == 1) nz -= extra_rows; 2329 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2330 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2331 2332 /* insert into matrix */ 2333 jj = rstart*bs; 2334 for (i=0; i<m; i++) { 2335 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2336 mycols += locrowlens[i]; 2337 vals += locrowlens[i]; 2338 jj++; 2339 } 2340 2341 /* read in other processors (except the last one) and ship out */ 2342 for (i=1; i<size-1; i++) { 2343 nz = procsnz[i]; 2344 vals = buf; 2345 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2346 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2347 } 2348 /* the last proc */ 2349 if (size != 1){ 2350 nz = procsnz[i] - extra_rows; 2351 vals = buf; 2352 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2353 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2354 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2355 } 2356 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2357 2358 } else { 2359 /* receive numeric values */ 2360 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2361 2362 /* receive message of values*/ 2363 vals = buf; 2364 mycols = ibuf; 2365 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2366 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2367 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2368 2369 /* insert into matrix */ 2370 jj = rstart*bs; 2371 for (i=0; i<m; i++) { 2372 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2373 mycols += locrowlens[i]; 2374 vals += locrowlens[i]; 2375 jj++; 2376 } 2377 } 2378 2379 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2380 ierr = PetscFree(buf);CHKERRQ(ierr); 2381 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2382 ierr = PetscFree(rowners);CHKERRQ(ierr); 2383 ierr = PetscFree(dlens);CHKERRQ(ierr); 2384 ierr = PetscFree(mask);CHKERRQ(ierr); 2385 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2386 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 *newmat = A; 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2393 /*XXXXX@ 2394 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2395 2396 Input Parameters: 2397 . mat - the matrix 2398 . fact - factor 2399 2400 Collective on Mat 2401 2402 Level: advanced 2403 2404 Notes: 2405 This can also be set by the command line option: -mat_use_hash_table fact 2406 2407 .keywords: matrix, hashtable, factor, HT 2408 2409 .seealso: MatSetOption() 2410 @XXXXX*/ 2411 2412 2413 #undef __FUNCT__ 2414 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2415 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2416 { 2417 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2418 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2419 PetscReal atmp; 2420 PetscReal *work,*svalues,*rvalues; 2421 PetscErrorCode ierr; 2422 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2423 PetscMPIInt rank,size; 2424 PetscInt *rowners_bs,dest,count,source; 2425 PetscScalar *va; 2426 MatScalar *ba; 2427 MPI_Status stat; 2428 2429 PetscFunctionBegin; 2430 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2431 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2432 2433 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2434 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2435 2436 bs = A->rmap.bs; 2437 mbs = a->mbs; 2438 Mbs = a->Mbs; 2439 ba = b->a; 2440 bi = b->i; 2441 bj = b->j; 2442 2443 /* find ownerships */ 2444 rowners_bs = A->rmap.range; 2445 2446 /* each proc creates an array to be distributed */ 2447 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2448 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2449 2450 /* row_max for B */ 2451 if (rank != size-1){ 2452 for (i=0; i<mbs; i++) { 2453 ncols = bi[1] - bi[0]; bi++; 2454 brow = bs*i; 2455 for (j=0; j<ncols; j++){ 2456 bcol = bs*(*bj); 2457 for (kcol=0; kcol<bs; kcol++){ 2458 col = bcol + kcol; /* local col index */ 2459 col += rowners_bs[rank+1]; /* global col index */ 2460 for (krow=0; krow<bs; krow++){ 2461 atmp = PetscAbsScalar(*ba); ba++; 2462 row = brow + krow; /* local row index */ 2463 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2464 if (work[col] < atmp) work[col] = atmp; 2465 } 2466 } 2467 bj++; 2468 } 2469 } 2470 2471 /* send values to its owners */ 2472 for (dest=rank+1; dest<size; dest++){ 2473 svalues = work + rowners_bs[dest]; 2474 count = rowners_bs[dest+1]-rowners_bs[dest]; 2475 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2476 } 2477 } 2478 2479 /* receive values */ 2480 if (rank){ 2481 rvalues = work; 2482 count = rowners_bs[rank+1]-rowners_bs[rank]; 2483 for (source=0; source<rank; source++){ 2484 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2485 /* process values */ 2486 for (i=0; i<count; i++){ 2487 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2488 } 2489 } 2490 } 2491 2492 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2493 ierr = PetscFree(work);CHKERRQ(ierr); 2494 PetscFunctionReturn(0); 2495 } 2496 2497 #undef __FUNCT__ 2498 #define __FUNCT__ "MatRelax_MPISBAIJ" 2499 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2500 { 2501 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2502 PetscErrorCode ierr; 2503 PetscInt mbs=mat->mbs,bs=matin->rmap.bs; 2504 PetscScalar *x,*b,*ptr,zero=0.0; 2505 Vec bb1; 2506 2507 PetscFunctionBegin; 2508 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2509 if (bs > 1) 2510 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2511 2512 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2513 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2514 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2515 its--; 2516 } 2517 2518 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2519 while (its--){ 2520 2521 /* lower triangular part: slvec0b = - B^T*xx */ 2522 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2523 2524 /* copy xx into slvec0a */ 2525 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2526 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2527 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2528 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2529 2530 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2531 2532 /* copy bb into slvec1a */ 2533 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2534 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2535 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2536 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2537 2538 /* set slvec1b = 0 */ 2539 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2540 2541 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2542 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2543 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2544 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2545 2546 /* upper triangular part: bb1 = bb1 - B*x */ 2547 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2548 2549 /* local diagonal sweep */ 2550 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2551 } 2552 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2553 } else { 2554 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2555 } 2556 PetscFunctionReturn(0); 2557 } 2558 2559 #undef __FUNCT__ 2560 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2561 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2562 { 2563 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2564 PetscErrorCode ierr; 2565 Vec lvec1,bb1; 2566 2567 PetscFunctionBegin; 2568 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2569 if (matin->rmap.bs > 1) 2570 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2571 2572 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2573 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2574 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2575 its--; 2576 } 2577 2578 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2579 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2580 while (its--){ 2581 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2582 2583 /* lower diagonal part: bb1 = bb - B^T*xx */ 2584 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2585 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2586 2587 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2588 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2589 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2590 2591 /* upper diagonal part: bb1 = bb1 - B*x */ 2592 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2593 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2594 2595 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2596 2597 /* diagonal sweep */ 2598 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2599 } 2600 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2601 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2602 } else { 2603 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2604 } 2605 PetscFunctionReturn(0); 2606 } 2607 2608