1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 104 N = nrow++ - 1; \ 105 /* shift up all the later entries in this row */ \ 106 for (ii=N; ii>=_i; ii--) { \ 107 rp[ii+1] = rp[ii]; \ 108 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 109 } \ 110 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 111 rp[_i] = bcol; \ 112 ap[bs2*_i + bs*cidx + ridx] = value; \ 113 a_noinsert:; \ 114 ailen[brow] = nrow; \ 115 } 116 #ifndef MatSetValues_SeqBAIJ_B_Private 117 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 118 { \ 119 brow = row/bs; \ 120 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 121 rmax = bimax[brow]; nrow = bilen[brow]; \ 122 bcol = col/bs; \ 123 ridx = row % bs; cidx = col % bs; \ 124 low = 0; high = nrow; \ 125 while (high-low > 3) { \ 126 t = (low+high)/2; \ 127 if (rp[t] > bcol) high = t; \ 128 else low = t; \ 129 } \ 130 for (_i=low; _i<high; _i++) { \ 131 if (rp[_i] > bcol) break; \ 132 if (rp[_i] == bcol) { \ 133 bap = ap + bs2*_i + bs*cidx + ridx; \ 134 if (addv == ADD_VALUES) *bap += value; \ 135 else *bap = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (b->nonew == 1) goto b_noinsert; \ 140 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 141 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 b_noinsert:; \ 152 bilen[brow] = nrow; \ 153 } 154 #endif 155 156 #if defined(PETSC_USE_MAT_SINGLE) 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetValues_MPISBAIJ" 159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 160 { 161 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,N = m*n; 164 MatScalar *vsingle; 165 166 PetscFunctionBegin; 167 if (N > b->setvalueslen) { 168 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 169 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 170 b->setvalueslen = N; 171 } 172 vsingle = b->setvaluescopy; 173 174 for (i=0; i<N; i++) { 175 vsingle[i] = v[i]; 176 } 177 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 184 { 185 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 186 PetscErrorCode ierr; 187 PetscInt i,N = m*n*b->bs2; 188 MatScalar *vsingle; 189 190 PetscFunctionBegin; 191 if (N > b->setvalueslen) { 192 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 193 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 194 b->setvalueslen = N; 195 } 196 vsingle = b->setvaluescopy; 197 for (i=0; i<N; i++) { 198 vsingle[i] = v[i]; 199 } 200 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 #endif 204 205 /* Only add/insert a(i,j) with i<=j (blocks). 206 Any a(i,j) with i>j input by user is ingored. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar" 210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 211 { 212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 213 MatScalar value; 214 PetscTruth roworiented = baij->roworiented; 215 PetscErrorCode ierr; 216 PetscInt i,j,row,col; 217 PetscInt rstart_orig=mat->rmap.rstart; 218 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 219 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 220 221 /* Some Variables required in the macro */ 222 Mat A = baij->A; 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 224 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 225 MatScalar *aa=a->a; 226 227 Mat B = baij->B; 228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 229 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 230 MatScalar *ba=b->a; 231 232 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 233 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 234 MatScalar *ap,*bap; 235 236 /* for stash */ 237 PetscInt n_loc, *in_loc=0; 238 MatScalar *v_loc=0; 239 240 PetscFunctionBegin; 241 242 if(!baij->donotstash){ 243 ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr); 244 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 245 } 246 247 for (i=0; i<m; i++) { 248 if (im[i] < 0) continue; 249 #if defined(PETSC_USE_DEBUG) 250 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 251 #endif 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 253 row = im[i] - rstart_orig; /* local row index */ 254 for (j=0; j<n; j++) { 255 if (im[i]/bs > in[j]/bs){ 256 if (a->ignore_ltriangular){ 257 continue; /* ignore lower triangular blocks */ 258 } else { 259 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 260 } 261 } 262 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 263 col = in[j] - cstart_orig; /* local col index */ 264 brow = row/bs; bcol = col/bs; 265 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 266 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 267 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 268 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 269 } else if (in[j] < 0) continue; 270 #if defined(PETSC_USE_DEBUG) 271 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 272 #endif 273 else { /* off-diag entry (B) */ 274 if (mat->was_assembled) { 275 if (!baij->colmap) { 276 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 277 } 278 #if defined (PETSC_USE_CTABLE) 279 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 280 col = col - 1; 281 #else 282 col = baij->colmap[in[j]/bs] - 1; 283 #endif 284 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 285 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 286 col = in[j]; 287 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 288 B = baij->B; 289 b = (Mat_SeqBAIJ*)(B)->data; 290 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 291 ba=b->a; 292 } else col += in[j]%bs; 293 } else col = in[j]; 294 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 295 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 296 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 297 } 298 } 299 } else { /* off processor entry */ 300 if (!baij->donotstash) { 301 n_loc = 0; 302 for (j=0; j<n; j++){ 303 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 304 in_loc[n_loc] = in[j]; 305 if (roworiented) { 306 v_loc[n_loc] = v[i*n+j]; 307 } else { 308 v_loc[n_loc] = v[j*m+i]; 309 } 310 n_loc++; 311 } 312 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 313 } 314 } 315 } 316 317 if(!baij->donotstash){ 318 ierr = PetscFree(in_loc);CHKERRQ(ierr); 319 ierr = PetscFree(v_loc);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 326 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 327 { 328 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 329 const MatScalar *value; 330 MatScalar *barray=baij->barray; 331 PetscTruth roworiented = baij->roworiented; 332 PetscErrorCode ierr; 333 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 334 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 335 PetscInt cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2; 336 337 PetscFunctionBegin; 338 if(!barray) { 339 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 340 baij->barray = barray; 341 } 342 343 if (roworiented) { 344 stepval = (n-1)*bs; 345 } else { 346 stepval = (m-1)*bs; 347 } 348 for (i=0; i<m; i++) { 349 if (im[i] < 0) continue; 350 #if defined(PETSC_USE_DEBUG) 351 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 352 #endif 353 if (im[i] >= rstart && im[i] < rend) { 354 row = im[i] - rstart; 355 for (j=0; j<n; j++) { 356 /* If NumCol = 1 then a copy is not required */ 357 if ((roworiented) && (n == 1)) { 358 barray = (MatScalar*) v + i*bs2; 359 } else if((!roworiented) && (m == 1)) { 360 barray = (MatScalar*) v + j*bs2; 361 } else { /* Here a copy is required */ 362 if (roworiented) { 363 value = v + i*(stepval+bs)*bs + j*bs; 364 } else { 365 value = v + j*(stepval+bs)*bs + i*bs; 366 } 367 for (ii=0; ii<bs; ii++,value+=stepval) { 368 for (jj=0; jj<bs; jj++) { 369 *barray++ = *value++; 370 } 371 } 372 barray -=bs2; 373 } 374 375 if (in[j] >= cstart && in[j] < cend){ 376 col = in[j] - cstart; 377 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 378 } 379 else if (in[j] < 0) continue; 380 #if defined(PETSC_USE_DEBUG) 381 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 382 #endif 383 else { 384 if (mat->was_assembled) { 385 if (!baij->colmap) { 386 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 387 } 388 389 #if defined(PETSC_USE_DEBUG) 390 #if defined (PETSC_USE_CTABLE) 391 { PetscInt data; 392 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 393 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 394 } 395 #else 396 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 397 #endif 398 #endif 399 #if defined (PETSC_USE_CTABLE) 400 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 401 col = (col - 1)/bs; 402 #else 403 col = (baij->colmap[in[j]] - 1)/bs; 404 #endif 405 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 406 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 407 col = in[j]; 408 } 409 } 410 else col = in[j]; 411 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 412 } 413 } 414 } else { 415 if (!baij->donotstash) { 416 if (roworiented) { 417 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 418 } else { 419 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 420 } 421 } 422 } 423 } 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatGetValues_MPISBAIJ" 429 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 430 { 431 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 432 PetscErrorCode ierr; 433 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 434 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 435 436 PetscFunctionBegin; 437 for (i=0; i<m; i++) { 438 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 439 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 440 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 441 row = idxm[i] - bsrstart; 442 for (j=0; j<n; j++) { 443 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 444 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 445 if (idxn[j] >= bscstart && idxn[j] < bscend){ 446 col = idxn[j] - bscstart; 447 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 448 } else { 449 if (!baij->colmap) { 450 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 451 } 452 #if defined (PETSC_USE_CTABLE) 453 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 454 data --; 455 #else 456 data = baij->colmap[idxn[j]/bs]-1; 457 #endif 458 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 459 else { 460 col = data + idxn[j]%bs; 461 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 462 } 463 } 464 } 465 } else { 466 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 467 } 468 } 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatNorm_MPISBAIJ" 474 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 475 { 476 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 477 PetscErrorCode ierr; 478 PetscReal sum[2],*lnorm2; 479 480 PetscFunctionBegin; 481 if (baij->size == 1) { 482 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 483 } else { 484 if (type == NORM_FROBENIUS) { 485 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 486 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 487 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 488 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 489 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 490 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 491 *norm = sqrt(sum[0] + 2*sum[1]); 492 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 493 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 494 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 495 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 496 PetscReal *rsum,*rsum2,vabs; 497 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 498 PetscInt brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs; 499 MatScalar *v; 500 501 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 502 rsum2 = rsum + mat->cmap.N; 503 ierr = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 504 /* Amat */ 505 v = amat->a; jj = amat->j; 506 for (brow=0; brow<mbs; brow++) { 507 grow = bs*(rstart + brow); 508 nz = amat->i[brow+1] - amat->i[brow]; 509 for (bcol=0; bcol<nz; bcol++){ 510 gcol = bs*(rstart + *jj); jj++; 511 for (col=0; col<bs; col++){ 512 for (row=0; row<bs; row++){ 513 vabs = PetscAbsScalar(*v); v++; 514 rsum[gcol+col] += vabs; 515 /* non-diagonal block */ 516 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 517 } 518 } 519 } 520 } 521 /* Bmat */ 522 v = bmat->a; jj = bmat->j; 523 for (brow=0; brow<mbs; brow++) { 524 grow = bs*(rstart + brow); 525 nz = bmat->i[brow+1] - bmat->i[brow]; 526 for (bcol=0; bcol<nz; bcol++){ 527 gcol = bs*garray[*jj]; jj++; 528 for (col=0; col<bs; col++){ 529 for (row=0; row<bs; row++){ 530 vabs = PetscAbsScalar(*v); v++; 531 rsum[gcol+col] += vabs; 532 rsum[grow+row] += vabs; 533 } 534 } 535 } 536 } 537 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 538 *norm = 0.0; 539 for (col=0; col<mat->cmap.N; col++) { 540 if (rsum2[col] > *norm) *norm = rsum2[col]; 541 } 542 ierr = PetscFree(rsum);CHKERRQ(ierr); 543 } else { 544 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 545 } 546 } 547 PetscFunctionReturn(0); 548 } 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 552 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 553 { 554 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 555 PetscErrorCode ierr; 556 PetscInt nstash,reallocs; 557 InsertMode addv; 558 559 PetscFunctionBegin; 560 if (baij->donotstash) { 561 PetscFunctionReturn(0); 562 } 563 564 /* make sure all processors are either in INSERTMODE or ADDMODE */ 565 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 566 if (addv == (ADD_VALUES|INSERT_VALUES)) { 567 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 568 } 569 mat->insertmode = addv; /* in case this processor had no cache */ 570 571 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 572 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 573 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 574 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 575 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 576 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 582 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 583 { 584 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 585 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 586 PetscErrorCode ierr; 587 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 588 PetscInt *row,*col,other_disassembled; 589 PetscMPIInt n; 590 PetscTruth r1,r2,r3; 591 MatScalar *val; 592 InsertMode addv = mat->insertmode; 593 594 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 595 PetscFunctionBegin; 596 597 if (!baij->donotstash) { 598 while (1) { 599 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 600 if (!flg) break; 601 602 for (i=0; i<n;) { 603 /* Now identify the consecutive vals belonging to the same row */ 604 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 605 if (j < n) ncols = j-i; 606 else ncols = n-i; 607 /* Now assemble all these values with a single function call */ 608 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 609 i = j; 610 } 611 } 612 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 613 /* Now process the block-stash. Since the values are stashed column-oriented, 614 set the roworiented flag to column oriented, and after MatSetValues() 615 restore the original flags */ 616 r1 = baij->roworiented; 617 r2 = a->roworiented; 618 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 619 baij->roworiented = PETSC_FALSE; 620 a->roworiented = PETSC_FALSE; 621 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 622 while (1) { 623 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 624 if (!flg) break; 625 626 for (i=0; i<n;) { 627 /* Now identify the consecutive vals belonging to the same row */ 628 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 629 if (j < n) ncols = j-i; 630 else ncols = n-i; 631 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 632 i = j; 633 } 634 } 635 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 636 baij->roworiented = r1; 637 a->roworiented = r2; 638 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 639 } 640 641 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 642 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 643 644 /* determine if any processor has disassembled, if so we must 645 also disassemble ourselfs, in order that we may reassemble. */ 646 /* 647 if nonzero structure of submatrix B cannot change then we know that 648 no processor disassembled thus we can skip this stuff 649 */ 650 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 651 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 652 if (mat->was_assembled && !other_disassembled) { 653 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 654 } 655 } 656 657 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 658 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 659 } 660 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 661 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 662 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 663 664 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 665 baij->rowvalues = 0; 666 667 PetscFunctionReturn(0); 668 } 669 670 #undef __FUNCT__ 671 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 672 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 673 { 674 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 675 PetscErrorCode ierr; 676 PetscInt bs = mat->rmap.bs; 677 PetscMPIInt size = baij->size,rank = baij->rank; 678 PetscTruth iascii,isdraw; 679 PetscViewer sviewer; 680 PetscViewerFormat format; 681 682 PetscFunctionBegin; 683 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 684 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 685 if (iascii) { 686 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 687 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 688 MatInfo info; 689 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 690 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 692 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 693 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 694 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 695 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 696 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 697 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 698 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 699 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } else if (format == PETSC_VIEWER_ASCII_INFO) { 702 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 705 PetscFunctionReturn(0); 706 } 707 } 708 709 if (isdraw) { 710 PetscDraw draw; 711 PetscTruth isnull; 712 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 713 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 714 } 715 716 if (size == 1) { 717 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 718 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 719 } else { 720 /* assemble the entire matrix onto first processor. */ 721 Mat A; 722 Mat_SeqSBAIJ *Aloc; 723 Mat_SeqBAIJ *Bloc; 724 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 725 MatScalar *a; 726 727 /* Should this be the same type as mat? */ 728 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 729 if (!rank) { 730 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 731 } else { 732 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 733 } 734 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 735 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 737 738 /* copy over the A part */ 739 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 740 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 741 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 742 743 for (i=0; i<mbs; i++) { 744 rvals[0] = mat->rmap.rstart + bs*i; 745 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 746 for (j=ai[i]; j<ai[i+1]; j++) { 747 col = mat->cmap.rstart+aj[j]*bs; 748 for (k=0; k<bs; k++) { 749 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 750 col++; a += bs; 751 } 752 } 753 } 754 /* copy over the B part */ 755 Bloc = (Mat_SeqBAIJ*)baij->B->data; 756 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 757 for (i=0; i<mbs; i++) { 758 rvals[0] = mat->rmap.rstart + bs; 759 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 760 for (j=ai[i]; j<ai[i+1]; j++) { 761 col = baij->garray[aj[j]]*bs; 762 for (k=0; k<bs; k++) { 763 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 764 col++; a += bs; 765 } 766 } 767 } 768 ierr = PetscFree(rvals);CHKERRQ(ierr); 769 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 770 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 771 /* 772 Everyone has to call to draw the matrix since the graphics waits are 773 synchronized across all processors that share the PetscDraw object 774 */ 775 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 776 if (!rank) { 777 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 778 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 779 } 780 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 781 ierr = MatDestroy(A);CHKERRQ(ierr); 782 } 783 PetscFunctionReturn(0); 784 } 785 786 #undef __FUNCT__ 787 #define __FUNCT__ "MatView_MPISBAIJ" 788 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 789 { 790 PetscErrorCode ierr; 791 PetscTruth iascii,isdraw,issocket,isbinary; 792 793 PetscFunctionBegin; 794 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 795 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 796 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 797 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 798 if (iascii || isdraw || issocket || isbinary) { 799 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 800 } else { 801 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 802 } 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNCT__ 807 #define __FUNCT__ "MatDestroy_MPISBAIJ" 808 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 809 { 810 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 811 PetscErrorCode ierr; 812 813 PetscFunctionBegin; 814 #if defined(PETSC_USE_LOG) 815 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 816 #endif 817 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 818 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 819 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 820 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 821 #if defined (PETSC_USE_CTABLE) 822 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 823 #else 824 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 825 #endif 826 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 827 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 828 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 829 if (baij->slvec0) { 830 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 831 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 832 } 833 if (baij->slvec1) { 834 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 835 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 836 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 837 } 838 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 839 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 840 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 841 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 842 #if defined(PETSC_USE_MAT_SINGLE) 843 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 844 #endif 845 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 846 ierr = PetscFree(baij);CHKERRQ(ierr); 847 848 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 849 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 850 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 851 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 #undef __FUNCT__ 856 #define __FUNCT__ "MatMult_MPISBAIJ" 857 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 858 { 859 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 860 PetscErrorCode ierr; 861 PetscInt nt,mbs=a->mbs,bs=A->rmap.bs; 862 PetscScalar *x,*from,zero=0.0; 863 864 PetscFunctionBegin; 865 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 866 if (nt != A->cmap.n) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 868 } 869 870 /* diagonal part */ 871 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 872 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 873 874 /* subdiagonal part */ 875 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 876 CHKMEMQ; 877 /* copy x into the vec slvec0 */ 878 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 879 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 880 CHKMEMQ; 881 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 882 CHKMEMQ; 883 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 884 885 CHKMEMQ; 886 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 887 CHKMEMQ; 888 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 889 CHKMEMQ; 890 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 891 CHKMEMQ; 892 /* supperdiagonal part */ 893 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 894 CHKMEMQ; 895 PetscFunctionReturn(0); 896 } 897 898 #undef __FUNCT__ 899 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 900 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 901 { 902 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 903 PetscErrorCode ierr; 904 PetscInt nt; 905 906 PetscFunctionBegin; 907 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 908 if (nt != A->cmap.n) { 909 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 910 } 911 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 912 if (nt != A->rmap.N) { 913 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 914 } 915 916 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 917 /* do diagonal part */ 918 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 919 /* do supperdiagonal part */ 920 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 921 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 922 /* do subdiagonal part */ 923 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 924 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 925 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 926 927 PetscFunctionReturn(0); 928 } 929 930 #undef __FUNCT__ 931 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 932 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 933 { 934 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 935 PetscErrorCode ierr; 936 PetscInt mbs=a->mbs,bs=A->rmap.bs; 937 PetscScalar *x,*from,zero=0.0; 938 939 PetscFunctionBegin; 940 /* 941 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 942 PetscSynchronizedFlush(A->comm); 943 */ 944 /* diagonal part */ 945 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 946 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 947 948 /* subdiagonal part */ 949 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 950 951 /* copy x into the vec slvec0 */ 952 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 953 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 954 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 955 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 956 957 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 958 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 959 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 960 961 /* supperdiagonal part */ 962 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 963 964 PetscFunctionReturn(0); 965 } 966 967 #undef __FUNCT__ 968 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 969 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 970 { 971 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 972 PetscErrorCode ierr; 973 974 PetscFunctionBegin; 975 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 976 /* do diagonal part */ 977 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 978 /* do supperdiagonal part */ 979 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 980 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 981 982 /* do subdiagonal part */ 983 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 984 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 985 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 986 987 PetscFunctionReturn(0); 988 } 989 990 /* 991 This only works correctly for square matrices where the subblock A->A is the 992 diagonal block 993 */ 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 996 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 997 { 998 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 999 PetscErrorCode ierr; 1000 1001 PetscFunctionBegin; 1002 /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1003 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatScale_MPISBAIJ" 1009 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1010 { 1011 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1012 PetscErrorCode ierr; 1013 1014 PetscFunctionBegin; 1015 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1016 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1022 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1023 { 1024 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1025 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1026 PetscErrorCode ierr; 1027 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1028 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1029 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1030 1031 PetscFunctionBegin; 1032 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1033 mat->getrowactive = PETSC_TRUE; 1034 1035 if (!mat->rowvalues && (idx || v)) { 1036 /* 1037 allocate enough space to hold information from the longest row. 1038 */ 1039 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1040 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1041 PetscInt max = 1,mbs = mat->mbs,tmp; 1042 for (i=0; i<mbs; i++) { 1043 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1044 if (max < tmp) { max = tmp; } 1045 } 1046 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1047 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1048 } 1049 1050 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1051 lrow = row - brstart; /* local row index */ 1052 1053 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1054 if (!v) {pvA = 0; pvB = 0;} 1055 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1056 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1057 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1058 nztot = nzA + nzB; 1059 1060 cmap = mat->garray; 1061 if (v || idx) { 1062 if (nztot) { 1063 /* Sort by increasing column numbers, assuming A and B already sorted */ 1064 PetscInt imark = -1; 1065 if (v) { 1066 *v = v_p = mat->rowvalues; 1067 for (i=0; i<nzB; i++) { 1068 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1069 else break; 1070 } 1071 imark = i; 1072 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1073 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1074 } 1075 if (idx) { 1076 *idx = idx_p = mat->rowindices; 1077 if (imark > -1) { 1078 for (i=0; i<imark; i++) { 1079 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1080 } 1081 } else { 1082 for (i=0; i<nzB; i++) { 1083 if (cmap[cworkB[i]/bs] < cstart) 1084 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1085 else break; 1086 } 1087 imark = i; 1088 } 1089 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1090 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1091 } 1092 } else { 1093 if (idx) *idx = 0; 1094 if (v) *v = 0; 1095 } 1096 } 1097 *nz = nztot; 1098 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1099 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1105 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1106 { 1107 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1108 1109 PetscFunctionBegin; 1110 if (!baij->getrowactive) { 1111 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1112 } 1113 baij->getrowactive = PETSC_FALSE; 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1119 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1120 { 1121 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1122 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1123 1124 PetscFunctionBegin; 1125 aA->getrow_utriangular = PETSC_TRUE; 1126 PetscFunctionReturn(0); 1127 } 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1130 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1131 { 1132 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1133 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1134 1135 PetscFunctionBegin; 1136 aA->getrow_utriangular = PETSC_FALSE; 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1142 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1143 { 1144 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1145 PetscErrorCode ierr; 1146 1147 PetscFunctionBegin; 1148 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1149 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1155 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1156 { 1157 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1158 PetscErrorCode ierr; 1159 1160 PetscFunctionBegin; 1161 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1162 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 #undef __FUNCT__ 1167 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1168 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1169 { 1170 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1171 PetscErrorCode ierr; 1172 1173 PetscFunctionBegin; 1174 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1175 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1181 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1182 { 1183 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1184 Mat A = a->A,B = a->B; 1185 PetscErrorCode ierr; 1186 PetscReal isend[5],irecv[5]; 1187 1188 PetscFunctionBegin; 1189 info->block_size = (PetscReal)matin->rmap.bs; 1190 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1191 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1192 isend[3] = info->memory; isend[4] = info->mallocs; 1193 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1194 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1195 isend[3] += info->memory; isend[4] += info->mallocs; 1196 if (flag == MAT_LOCAL) { 1197 info->nz_used = isend[0]; 1198 info->nz_allocated = isend[1]; 1199 info->nz_unneeded = isend[2]; 1200 info->memory = isend[3]; 1201 info->mallocs = isend[4]; 1202 } else if (flag == MAT_GLOBAL_MAX) { 1203 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1204 info->nz_used = irecv[0]; 1205 info->nz_allocated = irecv[1]; 1206 info->nz_unneeded = irecv[2]; 1207 info->memory = irecv[3]; 1208 info->mallocs = irecv[4]; 1209 } else if (flag == MAT_GLOBAL_SUM) { 1210 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1211 info->nz_used = irecv[0]; 1212 info->nz_allocated = irecv[1]; 1213 info->nz_unneeded = irecv[2]; 1214 info->memory = irecv[3]; 1215 info->mallocs = irecv[4]; 1216 } else { 1217 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1218 } 1219 info->rows_global = (PetscReal)A->rmap.N; 1220 info->columns_global = (PetscReal)A->cmap.N; 1221 info->rows_local = (PetscReal)A->rmap.N; 1222 info->columns_local = (PetscReal)A->cmap.N; 1223 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1224 info->fill_ratio_needed = 0; 1225 info->factor_mallocs = 0; 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1231 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1232 { 1233 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1234 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1235 PetscErrorCode ierr; 1236 1237 PetscFunctionBegin; 1238 switch (op) { 1239 case MAT_NO_NEW_NONZERO_LOCATIONS: 1240 case MAT_YES_NEW_NONZERO_LOCATIONS: 1241 case MAT_COLUMNS_UNSORTED: 1242 case MAT_COLUMNS_SORTED: 1243 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1244 case MAT_KEEP_ZEROED_ROWS: 1245 case MAT_NEW_NONZERO_LOCATION_ERR: 1246 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1247 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1248 break; 1249 case MAT_ROW_ORIENTED: 1250 a->roworiented = PETSC_TRUE; 1251 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1252 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1253 break; 1254 case MAT_ROWS_SORTED: 1255 case MAT_ROWS_UNSORTED: 1256 case MAT_YES_NEW_DIAGONALS: 1257 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1258 break; 1259 case MAT_COLUMN_ORIENTED: 1260 a->roworiented = PETSC_FALSE; 1261 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1262 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1263 break; 1264 case MAT_IGNORE_OFF_PROC_ENTRIES: 1265 a->donotstash = PETSC_TRUE; 1266 break; 1267 case MAT_NO_NEW_DIAGONALS: 1268 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1269 case MAT_USE_HASH_TABLE: 1270 a->ht_flag = PETSC_TRUE; 1271 break; 1272 case MAT_NOT_SYMMETRIC: 1273 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1274 case MAT_HERMITIAN: 1275 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1276 case MAT_SYMMETRIC: 1277 case MAT_STRUCTURALLY_SYMMETRIC: 1278 case MAT_NOT_HERMITIAN: 1279 case MAT_SYMMETRY_ETERNAL: 1280 case MAT_NOT_SYMMETRY_ETERNAL: 1281 break; 1282 case MAT_IGNORE_LOWER_TRIANGULAR: 1283 aA->ignore_ltriangular = PETSC_TRUE; 1284 break; 1285 case MAT_ERROR_LOWER_TRIANGULAR: 1286 aA->ignore_ltriangular = PETSC_FALSE; 1287 break; 1288 case MAT_GETROW_UPPERTRIANGULAR: 1289 aA->getrow_utriangular = PETSC_TRUE; 1290 break; 1291 default: 1292 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1293 } 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1299 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1300 { 1301 PetscErrorCode ierr; 1302 PetscFunctionBegin; 1303 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1309 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1310 { 1311 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1312 Mat a=baij->A, b=baij->B; 1313 PetscErrorCode ierr; 1314 PetscInt nv,m,n; 1315 PetscTruth flg; 1316 1317 PetscFunctionBegin; 1318 if (ll != rr){ 1319 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1320 if (!flg) 1321 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1322 } 1323 if (!ll) PetscFunctionReturn(0); 1324 1325 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1326 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1327 1328 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1329 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1330 1331 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1332 1333 /* left diagonalscale the off-diagonal part */ 1334 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1335 1336 /* scale the diagonal part */ 1337 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1338 1339 /* right diagonalscale the off-diagonal part */ 1340 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1341 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1347 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1348 { 1349 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1350 MPI_Comm comm = A->comm; 1351 static PetscTruth called = PETSC_FALSE; 1352 PetscErrorCode ierr; 1353 1354 PetscFunctionBegin; 1355 if (!a->rank) { 1356 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1357 } 1358 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1359 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1360 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1366 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1367 { 1368 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1369 PetscErrorCode ierr; 1370 1371 PetscFunctionBegin; 1372 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1377 1378 #undef __FUNCT__ 1379 #define __FUNCT__ "MatEqual_MPISBAIJ" 1380 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1381 { 1382 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1383 Mat a,b,c,d; 1384 PetscTruth flg; 1385 PetscErrorCode ierr; 1386 1387 PetscFunctionBegin; 1388 a = matA->A; b = matA->B; 1389 c = matB->A; d = matB->B; 1390 1391 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1392 if (flg) { 1393 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1394 } 1395 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1396 PetscFunctionReturn(0); 1397 } 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatCopy_MPISBAIJ" 1401 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1402 { 1403 PetscErrorCode ierr; 1404 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1405 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1406 1407 PetscFunctionBegin; 1408 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1409 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1410 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1411 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1412 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1413 } else { 1414 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1415 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1416 } 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1422 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1423 { 1424 PetscErrorCode ierr; 1425 1426 PetscFunctionBegin; 1427 ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #include "petscblaslapack.h" 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1434 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1435 { 1436 PetscErrorCode ierr; 1437 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1438 PetscBLASInt bnz,one=1; 1439 Mat_SeqSBAIJ *xa,*ya; 1440 Mat_SeqBAIJ *xb,*yb; 1441 1442 PetscFunctionBegin; 1443 if (str == SAME_NONZERO_PATTERN) { 1444 PetscScalar alpha = a; 1445 xa = (Mat_SeqSBAIJ *)xx->A->data; 1446 ya = (Mat_SeqSBAIJ *)yy->A->data; 1447 bnz = (PetscBLASInt)xa->nz; 1448 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1449 xb = (Mat_SeqBAIJ *)xx->B->data; 1450 yb = (Mat_SeqBAIJ *)yy->B->data; 1451 bnz = (PetscBLASInt)xb->nz; 1452 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1453 } else { 1454 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1455 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1456 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1457 } 1458 PetscFunctionReturn(0); 1459 } 1460 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1463 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1464 { 1465 PetscErrorCode ierr; 1466 PetscInt i; 1467 PetscTruth flg; 1468 1469 PetscFunctionBegin; 1470 for (i=0; i<n; i++) { 1471 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1472 if (!flg) { 1473 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1474 } 1475 } 1476 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 1481 /* -------------------------------------------------------------------*/ 1482 static struct _MatOps MatOps_Values = { 1483 MatSetValues_MPISBAIJ, 1484 MatGetRow_MPISBAIJ, 1485 MatRestoreRow_MPISBAIJ, 1486 MatMult_MPISBAIJ, 1487 /* 4*/ MatMultAdd_MPISBAIJ, 1488 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1489 MatMultAdd_MPISBAIJ, 1490 0, 1491 0, 1492 0, 1493 /*10*/ 0, 1494 0, 1495 0, 1496 MatRelax_MPISBAIJ, 1497 MatTranspose_MPISBAIJ, 1498 /*15*/ MatGetInfo_MPISBAIJ, 1499 MatEqual_MPISBAIJ, 1500 MatGetDiagonal_MPISBAIJ, 1501 MatDiagonalScale_MPISBAIJ, 1502 MatNorm_MPISBAIJ, 1503 /*20*/ MatAssemblyBegin_MPISBAIJ, 1504 MatAssemblyEnd_MPISBAIJ, 1505 0, 1506 MatSetOption_MPISBAIJ, 1507 MatZeroEntries_MPISBAIJ, 1508 /*25*/ 0, 1509 0, 1510 0, 1511 0, 1512 0, 1513 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1514 0, 1515 0, 1516 0, 1517 0, 1518 /*35*/ MatDuplicate_MPISBAIJ, 1519 0, 1520 0, 1521 0, 1522 0, 1523 /*40*/ MatAXPY_MPISBAIJ, 1524 MatGetSubMatrices_MPISBAIJ, 1525 MatIncreaseOverlap_MPISBAIJ, 1526 MatGetValues_MPISBAIJ, 1527 MatCopy_MPISBAIJ, 1528 /*45*/ MatPrintHelp_MPISBAIJ, 1529 MatScale_MPISBAIJ, 1530 0, 1531 0, 1532 0, 1533 /*50*/ 0, 1534 0, 1535 0, 1536 0, 1537 0, 1538 /*55*/ 0, 1539 0, 1540 MatSetUnfactored_MPISBAIJ, 1541 0, 1542 MatSetValuesBlocked_MPISBAIJ, 1543 /*60*/ 0, 1544 0, 1545 0, 1546 0, 1547 0, 1548 /*65*/ 0, 1549 0, 1550 0, 1551 0, 1552 0, 1553 /*70*/ MatGetRowMax_MPISBAIJ, 1554 0, 1555 0, 1556 0, 1557 0, 1558 /*75*/ 0, 1559 0, 1560 0, 1561 0, 1562 0, 1563 /*80*/ 0, 1564 0, 1565 0, 1566 0, 1567 MatLoad_MPISBAIJ, 1568 /*85*/ 0, 1569 0, 1570 0, 1571 0, 1572 0, 1573 /*90*/ 0, 1574 0, 1575 0, 1576 0, 1577 0, 1578 /*95*/ 0, 1579 0, 1580 0, 1581 0, 1582 0, 1583 /*100*/0, 1584 0, 1585 0, 1586 0, 1587 0, 1588 /*105*/0, 1589 MatRealPart_MPISBAIJ, 1590 MatImaginaryPart_MPISBAIJ, 1591 MatGetRowUpperTriangular_MPISBAIJ, 1592 MatRestoreRowUpperTriangular_MPISBAIJ 1593 }; 1594 1595 1596 EXTERN_C_BEGIN 1597 #undef __FUNCT__ 1598 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1599 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1600 { 1601 PetscFunctionBegin; 1602 *a = ((Mat_MPISBAIJ *)A->data)->A; 1603 *iscopy = PETSC_FALSE; 1604 PetscFunctionReturn(0); 1605 } 1606 EXTERN_C_END 1607 1608 EXTERN_C_BEGIN 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1611 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1612 { 1613 Mat_MPISBAIJ *b; 1614 PetscErrorCode ierr; 1615 PetscInt i,mbs,Mbs; 1616 1617 PetscFunctionBegin; 1618 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1619 1620 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1621 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1622 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1623 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1624 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1625 1626 B->rmap.bs = B->cmap.bs = bs; 1627 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1628 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1629 1630 if (d_nnz) { 1631 for (i=0; i<B->rmap.n/bs; i++) { 1632 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1633 } 1634 } 1635 if (o_nnz) { 1636 for (i=0; i<B->rmap.n/bs; i++) { 1637 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1638 } 1639 } 1640 B->preallocated = PETSC_TRUE; 1641 1642 b = (Mat_MPISBAIJ*)B->data; 1643 mbs = B->rmap.n/bs; 1644 Mbs = B->rmap.N/bs; 1645 if (mbs*bs != B->rmap.n) { 1646 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs); 1647 } 1648 1649 B->rmap.bs = bs; 1650 b->bs2 = bs*bs; 1651 b->mbs = mbs; 1652 b->nbs = mbs; 1653 b->Mbs = Mbs; 1654 b->Nbs = Mbs; 1655 1656 for (i=0; i<=b->size; i++) { 1657 b->rangebs[i] = B->rmap.range[i]/bs; 1658 } 1659 b->rstartbs = B->rmap.rstart/bs; 1660 b->rendbs = B->rmap.rend/bs; 1661 1662 b->cstartbs = B->cmap.rstart/bs; 1663 b->cendbs = B->cmap.rend/bs; 1664 1665 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1666 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1667 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1668 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1669 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1670 1671 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1672 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1673 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1674 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1675 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1676 1677 /* build cache for off array entries formed */ 1678 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1679 1680 PetscFunctionReturn(0); 1681 } 1682 EXTERN_C_END 1683 1684 /*MC 1685 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1686 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1687 1688 Options Database Keys: 1689 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1690 1691 Level: beginner 1692 1693 .seealso: MatCreateMPISBAIJ 1694 M*/ 1695 1696 EXTERN_C_BEGIN 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatCreate_MPISBAIJ" 1699 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1700 { 1701 Mat_MPISBAIJ *b; 1702 PetscErrorCode ierr; 1703 PetscTruth flg; 1704 1705 PetscFunctionBegin; 1706 1707 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1708 B->data = (void*)b; 1709 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1710 1711 B->ops->destroy = MatDestroy_MPISBAIJ; 1712 B->ops->view = MatView_MPISBAIJ; 1713 B->mapping = 0; 1714 B->factor = 0; 1715 B->assembled = PETSC_FALSE; 1716 1717 B->insertmode = NOT_SET_VALUES; 1718 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1719 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1720 1721 /* build local table of row and column ownerships */ 1722 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1723 1724 /* build cache for off array entries formed */ 1725 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1726 b->donotstash = PETSC_FALSE; 1727 b->colmap = PETSC_NULL; 1728 b->garray = PETSC_NULL; 1729 b->roworiented = PETSC_TRUE; 1730 1731 #if defined(PETSC_USE_MAT_SINGLE) 1732 /* stuff for MatSetValues_XXX in single precision */ 1733 b->setvalueslen = 0; 1734 b->setvaluescopy = PETSC_NULL; 1735 #endif 1736 1737 /* stuff used in block assembly */ 1738 b->barray = 0; 1739 1740 /* stuff used for matrix vector multiply */ 1741 b->lvec = 0; 1742 b->Mvctx = 0; 1743 b->slvec0 = 0; 1744 b->slvec0b = 0; 1745 b->slvec1 = 0; 1746 b->slvec1a = 0; 1747 b->slvec1b = 0; 1748 b->sMvctx = 0; 1749 1750 /* stuff for MatGetRow() */ 1751 b->rowindices = 0; 1752 b->rowvalues = 0; 1753 b->getrowactive = PETSC_FALSE; 1754 1755 /* hash table stuff */ 1756 b->ht = 0; 1757 b->hd = 0; 1758 b->ht_size = 0; 1759 b->ht_flag = PETSC_FALSE; 1760 b->ht_fact = 0; 1761 b->ht_total_ct = 0; 1762 b->ht_insert_ct = 0; 1763 1764 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1765 if (flg) { 1766 PetscReal fact = 1.39; 1767 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1768 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1769 if (fact <= 1.0) fact = 1.39; 1770 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1771 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1772 } 1773 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1774 "MatStoreValues_MPISBAIJ", 1775 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1776 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1777 "MatRetrieveValues_MPISBAIJ", 1778 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1779 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1780 "MatGetDiagonalBlock_MPISBAIJ", 1781 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1782 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1783 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1784 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1785 B->symmetric = PETSC_TRUE; 1786 B->structurally_symmetric = PETSC_TRUE; 1787 B->symmetric_set = PETSC_TRUE; 1788 B->structurally_symmetric_set = PETSC_TRUE; 1789 PetscFunctionReturn(0); 1790 } 1791 EXTERN_C_END 1792 1793 /*MC 1794 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1795 1796 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1797 and MATMPISBAIJ otherwise. 1798 1799 Options Database Keys: 1800 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1801 1802 Level: beginner 1803 1804 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1805 M*/ 1806 1807 EXTERN_C_BEGIN 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatCreate_SBAIJ" 1810 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1811 { 1812 PetscErrorCode ierr; 1813 PetscMPIInt size; 1814 1815 PetscFunctionBegin; 1816 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1817 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1818 if (size == 1) { 1819 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1820 } else { 1821 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1822 } 1823 PetscFunctionReturn(0); 1824 } 1825 EXTERN_C_END 1826 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1829 /*@C 1830 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1831 the user should preallocate the matrix storage by setting the parameters 1832 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1833 performance can be increased by more than a factor of 50. 1834 1835 Collective on Mat 1836 1837 Input Parameters: 1838 + A - the matrix 1839 . bs - size of blockk 1840 . d_nz - number of block nonzeros per block row in diagonal portion of local 1841 submatrix (same for all local rows) 1842 . d_nnz - array containing the number of block nonzeros in the various block rows 1843 in the upper triangular and diagonal part of the in diagonal portion of the local 1844 (possibly different for each block row) or PETSC_NULL. You must leave room 1845 for the diagonal entry even if it is zero. 1846 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1847 submatrix (same for all local rows). 1848 - o_nnz - array containing the number of nonzeros in the various block rows of the 1849 off-diagonal portion of the local submatrix (possibly different for 1850 each block row) or PETSC_NULL. 1851 1852 1853 Options Database Keys: 1854 . -mat_no_unroll - uses code that does not unroll the loops in the 1855 block calculations (much slower) 1856 . -mat_block_size - size of the blocks to use 1857 1858 Notes: 1859 1860 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1861 than it must be used on all processors that share the object for that argument. 1862 1863 If the *_nnz parameter is given then the *_nz parameter is ignored 1864 1865 Storage Information: 1866 For a square global matrix we define each processor's diagonal portion 1867 to be its local rows and the corresponding columns (a square submatrix); 1868 each processor's off-diagonal portion encompasses the remainder of the 1869 local matrix (a rectangular submatrix). 1870 1871 The user can specify preallocated storage for the diagonal part of 1872 the local submatrix with either d_nz or d_nnz (not both). Set 1873 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1874 memory allocation. Likewise, specify preallocated storage for the 1875 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1876 1877 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1878 the figure below we depict these three local rows and all columns (0-11). 1879 1880 .vb 1881 0 1 2 3 4 5 6 7 8 9 10 11 1882 ------------------- 1883 row 3 | o o o d d d o o o o o o 1884 row 4 | o o o d d d o o o o o o 1885 row 5 | o o o d d d o o o o o o 1886 ------------------- 1887 .ve 1888 1889 Thus, any entries in the d locations are stored in the d (diagonal) 1890 submatrix, and any entries in the o locations are stored in the 1891 o (off-diagonal) submatrix. Note that the d matrix is stored in 1892 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1893 1894 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1895 plus the diagonal part of the d matrix, 1896 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1897 In general, for PDE problems in which most nonzeros are near the diagonal, 1898 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1899 or you will get TERRIBLE performance; see the users' manual chapter on 1900 matrices. 1901 1902 Level: intermediate 1903 1904 .keywords: matrix, block, aij, compressed row, sparse, parallel 1905 1906 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1907 @*/ 1908 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1909 { 1910 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1911 1912 PetscFunctionBegin; 1913 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1914 if (f) { 1915 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1916 } 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatCreateMPISBAIJ" 1922 /*@C 1923 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1924 (block compressed row). For good matrix assembly performance 1925 the user should preallocate the matrix storage by setting the parameters 1926 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1927 performance can be increased by more than a factor of 50. 1928 1929 Collective on MPI_Comm 1930 1931 Input Parameters: 1932 + comm - MPI communicator 1933 . bs - size of blockk 1934 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1935 This value should be the same as the local size used in creating the 1936 y vector for the matrix-vector product y = Ax. 1937 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1938 This value should be the same as the local size used in creating the 1939 x vector for the matrix-vector product y = Ax. 1940 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1941 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1942 . d_nz - number of block nonzeros per block row in diagonal portion of local 1943 submatrix (same for all local rows) 1944 . d_nnz - array containing the number of block nonzeros in the various block rows 1945 in the upper triangular portion of the in diagonal portion of the local 1946 (possibly different for each block block row) or PETSC_NULL. 1947 You must leave room for the diagonal entry even if it is zero. 1948 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1949 submatrix (same for all local rows). 1950 - o_nnz - array containing the number of nonzeros in the various block rows of the 1951 off-diagonal portion of the local submatrix (possibly different for 1952 each block row) or PETSC_NULL. 1953 1954 Output Parameter: 1955 . A - the matrix 1956 1957 Options Database Keys: 1958 . -mat_no_unroll - uses code that does not unroll the loops in the 1959 block calculations (much slower) 1960 . -mat_block_size - size of the blocks to use 1961 . -mat_mpi - use the parallel matrix data structures even on one processor 1962 (defaults to using SeqBAIJ format on one processor) 1963 1964 Notes: 1965 The number of rows and columns must be divisible by blocksize. 1966 1967 The user MUST specify either the local or global matrix dimensions 1968 (possibly both). 1969 1970 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1971 than it must be used on all processors that share the object for that argument. 1972 1973 If the *_nnz parameter is given then the *_nz parameter is ignored 1974 1975 Storage Information: 1976 For a square global matrix we define each processor's diagonal portion 1977 to be its local rows and the corresponding columns (a square submatrix); 1978 each processor's off-diagonal portion encompasses the remainder of the 1979 local matrix (a rectangular submatrix). 1980 1981 The user can specify preallocated storage for the diagonal part of 1982 the local submatrix with either d_nz or d_nnz (not both). Set 1983 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1984 memory allocation. Likewise, specify preallocated storage for the 1985 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1986 1987 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1988 the figure below we depict these three local rows and all columns (0-11). 1989 1990 .vb 1991 0 1 2 3 4 5 6 7 8 9 10 11 1992 ------------------- 1993 row 3 | o o o d d d o o o o o o 1994 row 4 | o o o d d d o o o o o o 1995 row 5 | o o o d d d o o o o o o 1996 ------------------- 1997 .ve 1998 1999 Thus, any entries in the d locations are stored in the d (diagonal) 2000 submatrix, and any entries in the o locations are stored in the 2001 o (off-diagonal) submatrix. Note that the d matrix is stored in 2002 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2003 2004 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2005 plus the diagonal part of the d matrix, 2006 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2007 In general, for PDE problems in which most nonzeros are near the diagonal, 2008 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2009 or you will get TERRIBLE performance; see the users' manual chapter on 2010 matrices. 2011 2012 Level: intermediate 2013 2014 .keywords: matrix, block, aij, compressed row, sparse, parallel 2015 2016 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2017 @*/ 2018 2019 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2020 { 2021 PetscErrorCode ierr; 2022 PetscMPIInt size; 2023 2024 PetscFunctionBegin; 2025 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2026 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2027 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2028 if (size > 1) { 2029 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2030 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2031 } else { 2032 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2033 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2034 } 2035 PetscFunctionReturn(0); 2036 } 2037 2038 2039 #undef __FUNCT__ 2040 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2041 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2042 { 2043 Mat mat; 2044 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2045 PetscErrorCode ierr; 2046 PetscInt len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs; 2047 PetscScalar *array; 2048 2049 PetscFunctionBegin; 2050 *newmat = 0; 2051 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2052 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2053 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2054 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2055 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2056 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2057 2058 mat->factor = matin->factor; 2059 mat->preallocated = PETSC_TRUE; 2060 mat->assembled = PETSC_TRUE; 2061 mat->insertmode = NOT_SET_VALUES; 2062 2063 a = (Mat_MPISBAIJ*)mat->data; 2064 a->bs2 = oldmat->bs2; 2065 a->mbs = oldmat->mbs; 2066 a->nbs = oldmat->nbs; 2067 a->Mbs = oldmat->Mbs; 2068 a->Nbs = oldmat->Nbs; 2069 2070 2071 a->size = oldmat->size; 2072 a->rank = oldmat->rank; 2073 a->donotstash = oldmat->donotstash; 2074 a->roworiented = oldmat->roworiented; 2075 a->rowindices = 0; 2076 a->rowvalues = 0; 2077 a->getrowactive = PETSC_FALSE; 2078 a->barray = 0; 2079 a->rstartbs = oldmat->rstartbs; 2080 a->rendbs = oldmat->rendbs; 2081 a->cstartbs = oldmat->cstartbs; 2082 a->cendbs = oldmat->cendbs; 2083 2084 /* hash table stuff */ 2085 a->ht = 0; 2086 a->hd = 0; 2087 a->ht_size = 0; 2088 a->ht_flag = oldmat->ht_flag; 2089 a->ht_fact = oldmat->ht_fact; 2090 a->ht_total_ct = 0; 2091 a->ht_insert_ct = 0; 2092 2093 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2094 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2095 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2096 if (oldmat->colmap) { 2097 #if defined (PETSC_USE_CTABLE) 2098 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2099 #else 2100 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2101 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2102 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2103 #endif 2104 } else a->colmap = 0; 2105 2106 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2107 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2108 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2109 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2110 } else a->garray = 0; 2111 2112 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2113 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2114 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2115 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2116 2117 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2118 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2119 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2120 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2121 2122 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2123 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2124 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2125 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2126 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2127 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2128 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2129 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2130 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2131 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2132 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2133 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2134 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2135 2136 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2137 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2138 a->sMvctx = oldmat->sMvctx; 2139 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2140 2141 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2142 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2143 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2144 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2145 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2146 *newmat = mat; 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #include "petscsys.h" 2151 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatLoad_MPISBAIJ" 2154 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2155 { 2156 Mat A; 2157 PetscErrorCode ierr; 2158 PetscInt i,nz,j,rstart,rend; 2159 PetscScalar *vals,*buf; 2160 MPI_Comm comm = ((PetscObject)viewer)->comm; 2161 MPI_Status status; 2162 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 2163 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2164 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2165 PetscInt bs=1,Mbs,mbs,extra_rows; 2166 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2167 PetscInt dcount,kmax,k,nzcount,tmp; 2168 int fd; 2169 2170 PetscFunctionBegin; 2171 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2172 2173 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2174 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2175 if (!rank) { 2176 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2177 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2178 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2179 if (header[3] < 0) { 2180 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2181 } 2182 } 2183 2184 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2185 M = header[1]; N = header[2]; 2186 2187 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2188 2189 /* 2190 This code adds extra rows to make sure the number of rows is 2191 divisible by the blocksize 2192 */ 2193 Mbs = M/bs; 2194 extra_rows = bs - M + bs*(Mbs); 2195 if (extra_rows == bs) extra_rows = 0; 2196 else Mbs++; 2197 if (extra_rows &&!rank) { 2198 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2199 } 2200 2201 /* determine ownership of all rows */ 2202 mbs = Mbs/size + ((Mbs % size) > rank); 2203 m = mbs*bs; 2204 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2205 browners = rowners + size + 1; 2206 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2207 rowners[0] = 0; 2208 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2209 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2210 rstart = rowners[rank]; 2211 rend = rowners[rank+1]; 2212 2213 /* distribute row lengths to all processors */ 2214 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2215 if (!rank) { 2216 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2217 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2218 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2219 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2220 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2221 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2222 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2223 } else { 2224 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2225 } 2226 2227 if (!rank) { /* procs[0] */ 2228 /* calculate the number of nonzeros on each processor */ 2229 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2230 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2231 for (i=0; i<size; i++) { 2232 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2233 procsnz[i] += rowlengths[j]; 2234 } 2235 } 2236 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2237 2238 /* determine max buffer needed and allocate it */ 2239 maxnz = 0; 2240 for (i=0; i<size; i++) { 2241 maxnz = PetscMax(maxnz,procsnz[i]); 2242 } 2243 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2244 2245 /* read in my part of the matrix column indices */ 2246 nz = procsnz[0]; 2247 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2248 mycols = ibuf; 2249 if (size == 1) nz -= extra_rows; 2250 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2251 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2252 2253 /* read in every ones (except the last) and ship off */ 2254 for (i=1; i<size-1; i++) { 2255 nz = procsnz[i]; 2256 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2257 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2258 } 2259 /* read in the stuff for the last proc */ 2260 if (size != 1) { 2261 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2262 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2263 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2264 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2265 } 2266 ierr = PetscFree(cols);CHKERRQ(ierr); 2267 } else { /* procs[i], i>0 */ 2268 /* determine buffer space needed for message */ 2269 nz = 0; 2270 for (i=0; i<m; i++) { 2271 nz += locrowlens[i]; 2272 } 2273 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2274 mycols = ibuf; 2275 /* receive message of column indices*/ 2276 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2277 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2278 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2279 } 2280 2281 /* loop over local rows, determining number of off diagonal entries */ 2282 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2283 odlens = dlens + (rend-rstart); 2284 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2285 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2286 masked1 = mask + Mbs; 2287 masked2 = masked1 + Mbs; 2288 rowcount = 0; nzcount = 0; 2289 for (i=0; i<mbs; i++) { 2290 dcount = 0; 2291 odcount = 0; 2292 for (j=0; j<bs; j++) { 2293 kmax = locrowlens[rowcount]; 2294 for (k=0; k<kmax; k++) { 2295 tmp = mycols[nzcount++]/bs; /* block col. index */ 2296 if (!mask[tmp]) { 2297 mask[tmp] = 1; 2298 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2299 else masked1[dcount++] = tmp; /* entry in diag portion */ 2300 } 2301 } 2302 rowcount++; 2303 } 2304 2305 dlens[i] = dcount; /* d_nzz[i] */ 2306 odlens[i] = odcount; /* o_nzz[i] */ 2307 2308 /* zero out the mask elements we set */ 2309 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2310 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2311 } 2312 2313 /* create our matrix */ 2314 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2315 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2316 ierr = MatSetType(A,type);CHKERRQ(ierr); 2317 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2318 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2319 2320 if (!rank) { 2321 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2322 /* read in my part of the matrix numerical values */ 2323 nz = procsnz[0]; 2324 vals = buf; 2325 mycols = ibuf; 2326 if (size == 1) nz -= extra_rows; 2327 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2328 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2329 2330 /* insert into matrix */ 2331 jj = rstart*bs; 2332 for (i=0; i<m; i++) { 2333 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2334 mycols += locrowlens[i]; 2335 vals += locrowlens[i]; 2336 jj++; 2337 } 2338 2339 /* read in other processors (except the last one) and ship out */ 2340 for (i=1; i<size-1; i++) { 2341 nz = procsnz[i]; 2342 vals = buf; 2343 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2344 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2345 } 2346 /* the last proc */ 2347 if (size != 1){ 2348 nz = procsnz[i] - extra_rows; 2349 vals = buf; 2350 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2351 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2352 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2353 } 2354 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2355 2356 } else { 2357 /* receive numeric values */ 2358 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2359 2360 /* receive message of values*/ 2361 vals = buf; 2362 mycols = ibuf; 2363 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2364 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2365 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2366 2367 /* insert into matrix */ 2368 jj = rstart*bs; 2369 for (i=0; i<m; i++) { 2370 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2371 mycols += locrowlens[i]; 2372 vals += locrowlens[i]; 2373 jj++; 2374 } 2375 } 2376 2377 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2378 ierr = PetscFree(buf);CHKERRQ(ierr); 2379 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2380 ierr = PetscFree(rowners);CHKERRQ(ierr); 2381 ierr = PetscFree(dlens);CHKERRQ(ierr); 2382 ierr = PetscFree(mask);CHKERRQ(ierr); 2383 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2384 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2385 *newmat = A; 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2391 /*XXXXX@ 2392 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2393 2394 Input Parameters: 2395 . mat - the matrix 2396 . fact - factor 2397 2398 Collective on Mat 2399 2400 Level: advanced 2401 2402 Notes: 2403 This can also be set by the command line option: -mat_use_hash_table fact 2404 2405 .keywords: matrix, hashtable, factor, HT 2406 2407 .seealso: MatSetOption() 2408 @XXXXX*/ 2409 2410 2411 #undef __FUNCT__ 2412 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2413 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2414 { 2415 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2416 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2417 PetscReal atmp; 2418 PetscReal *work,*svalues,*rvalues; 2419 PetscErrorCode ierr; 2420 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2421 PetscMPIInt rank,size; 2422 PetscInt *rowners_bs,dest,count,source; 2423 PetscScalar *va; 2424 MatScalar *ba; 2425 MPI_Status stat; 2426 2427 PetscFunctionBegin; 2428 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2429 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2430 2431 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2432 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2433 2434 bs = A->rmap.bs; 2435 mbs = a->mbs; 2436 Mbs = a->Mbs; 2437 ba = b->a; 2438 bi = b->i; 2439 bj = b->j; 2440 2441 /* find ownerships */ 2442 rowners_bs = A->rmap.range; 2443 2444 /* each proc creates an array to be distributed */ 2445 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2446 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2447 2448 /* row_max for B */ 2449 if (rank != size-1){ 2450 for (i=0; i<mbs; i++) { 2451 ncols = bi[1] - bi[0]; bi++; 2452 brow = bs*i; 2453 for (j=0; j<ncols; j++){ 2454 bcol = bs*(*bj); 2455 for (kcol=0; kcol<bs; kcol++){ 2456 col = bcol + kcol; /* local col index */ 2457 col += rowners_bs[rank+1]; /* global col index */ 2458 for (krow=0; krow<bs; krow++){ 2459 atmp = PetscAbsScalar(*ba); ba++; 2460 row = brow + krow; /* local row index */ 2461 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2462 if (work[col] < atmp) work[col] = atmp; 2463 } 2464 } 2465 bj++; 2466 } 2467 } 2468 2469 /* send values to its owners */ 2470 for (dest=rank+1; dest<size; dest++){ 2471 svalues = work + rowners_bs[dest]; 2472 count = rowners_bs[dest+1]-rowners_bs[dest]; 2473 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2474 } 2475 } 2476 2477 /* receive values */ 2478 if (rank){ 2479 rvalues = work; 2480 count = rowners_bs[rank+1]-rowners_bs[rank]; 2481 for (source=0; source<rank; source++){ 2482 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2483 /* process values */ 2484 for (i=0; i<count; i++){ 2485 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2486 } 2487 } 2488 } 2489 2490 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2491 ierr = PetscFree(work);CHKERRQ(ierr); 2492 PetscFunctionReturn(0); 2493 } 2494 2495 #undef __FUNCT__ 2496 #define __FUNCT__ "MatRelax_MPISBAIJ" 2497 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2498 { 2499 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2500 PetscErrorCode ierr; 2501 PetscInt mbs=mat->mbs,bs=matin->rmap.bs; 2502 PetscScalar *x,*b,*ptr,zero=0.0; 2503 Vec bb1; 2504 2505 PetscFunctionBegin; 2506 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2507 if (bs > 1) 2508 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2509 2510 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2511 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2512 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2513 its--; 2514 } 2515 2516 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2517 while (its--){ 2518 2519 /* lower triangular part: slvec0b = - B^T*xx */ 2520 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2521 2522 /* copy xx into slvec0a */ 2523 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2524 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2525 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2526 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2527 2528 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2529 2530 /* copy bb into slvec1a */ 2531 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2532 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2533 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2534 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2535 2536 /* set slvec1b = 0 */ 2537 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2538 2539 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2540 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2541 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2542 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2543 2544 /* upper triangular part: bb1 = bb1 - B*x */ 2545 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2546 2547 /* local diagonal sweep */ 2548 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2549 } 2550 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2551 } else { 2552 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2553 } 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2559 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2560 { 2561 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2562 PetscErrorCode ierr; 2563 Vec lvec1,bb1; 2564 2565 PetscFunctionBegin; 2566 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2567 if (matin->rmap.bs > 1) 2568 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2569 2570 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2571 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2572 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2573 its--; 2574 } 2575 2576 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2577 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2578 while (its--){ 2579 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2580 2581 /* lower diagonal part: bb1 = bb - B^T*xx */ 2582 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2583 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2584 2585 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2586 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2587 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2588 2589 /* upper diagonal part: bb1 = bb1 - B*x */ 2590 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2591 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2592 2593 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2594 2595 /* diagonal sweep */ 2596 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2597 } 2598 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2599 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2600 } else { 2601 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2602 } 2603 PetscFunctionReturn(0); 2604 } 2605 2606