1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew); \ 104 N = nrow++ - 1; \ 105 /* shift up all the later entries in this row */ \ 106 for (ii=N; ii>=_i; ii--) { \ 107 rp[ii+1] = rp[ii]; \ 108 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 109 } \ 110 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 111 rp[_i] = bcol; \ 112 ap[bs2*_i + bs*cidx + ridx] = value; \ 113 a_noinsert:; \ 114 ailen[brow] = nrow; \ 115 } 116 #ifndef MatSetValues_SeqBAIJ_B_Private 117 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 118 { \ 119 brow = row/bs; \ 120 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 121 rmax = bimax[brow]; nrow = bilen[brow]; \ 122 bcol = col/bs; \ 123 ridx = row % bs; cidx = col % bs; \ 124 low = 0; high = nrow; \ 125 while (high-low > 3) { \ 126 t = (low+high)/2; \ 127 if (rp[t] > bcol) high = t; \ 128 else low = t; \ 129 } \ 130 for (_i=low; _i<high; _i++) { \ 131 if (rp[_i] > bcol) break; \ 132 if (rp[_i] == bcol) { \ 133 bap = ap + bs2*_i + bs*cidx + ridx; \ 134 if (addv == ADD_VALUES) *bap += value; \ 135 else *bap = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (b->nonew == 1) goto b_noinsert; \ 140 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 141 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew); \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 b_noinsert:; \ 152 bilen[brow] = nrow; \ 153 } 154 #endif 155 156 #if defined(PETSC_USE_MAT_SINGLE) 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetValues_MPISBAIJ" 159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 160 { 161 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,N = m*n; 164 MatScalar *vsingle; 165 166 PetscFunctionBegin; 167 if (N > b->setvalueslen) { 168 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 169 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 170 b->setvalueslen = N; 171 } 172 vsingle = b->setvaluescopy; 173 174 for (i=0; i<N; i++) { 175 vsingle[i] = v[i]; 176 } 177 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 184 { 185 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 186 PetscErrorCode ierr; 187 PetscInt i,N = m*n*b->bs2; 188 MatScalar *vsingle; 189 190 PetscFunctionBegin; 191 if (N > b->setvalueslen) { 192 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 193 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 194 b->setvalueslen = N; 195 } 196 vsingle = b->setvaluescopy; 197 for (i=0; i<N; i++) { 198 vsingle[i] = v[i]; 199 } 200 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 #endif 204 205 /* Only add/insert a(i,j) with i<=j (blocks). 206 Any a(i,j) with i>j input by user is ingored. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar" 210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 211 { 212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 213 MatScalar value; 214 PetscTruth roworiented = baij->roworiented; 215 PetscErrorCode ierr; 216 PetscInt i,j,row,col; 217 PetscInt rstart_orig=mat->rmap.rstart; 218 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 219 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 220 221 /* Some Variables required in the macro */ 222 Mat A = baij->A; 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 224 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 225 MatScalar *aa=a->a; 226 227 Mat B = baij->B; 228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 229 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 230 MatScalar *ba=b->a; 231 232 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 233 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 234 MatScalar *ap,*bap; 235 236 /* for stash */ 237 PetscInt n_loc, *in_loc = PETSC_NULL; 238 MatScalar *v_loc = PETSC_NULL; 239 240 PetscFunctionBegin; 241 242 if (!baij->donotstash){ 243 if (n > baij->n_loc) { 244 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 245 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 246 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 247 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 248 baij->n_loc = n; 249 } 250 in_loc = baij->in_loc; 251 v_loc = baij->v_loc; 252 } 253 254 for (i=0; i<m; i++) { 255 if (im[i] < 0) continue; 256 #if defined(PETSC_USE_DEBUG) 257 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 258 #endif 259 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 260 row = im[i] - rstart_orig; /* local row index */ 261 for (j=0; j<n; j++) { 262 if (im[i]/bs > in[j]/bs){ 263 if (a->ignore_ltriangular){ 264 continue; /* ignore lower triangular blocks */ 265 } else { 266 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 267 } 268 } 269 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 270 col = in[j] - cstart_orig; /* local col index */ 271 brow = row/bs; bcol = col/bs; 272 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 273 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 274 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 275 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 276 } else if (in[j] < 0) continue; 277 #if defined(PETSC_USE_DEBUG) 278 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 279 #endif 280 else { /* off-diag entry (B) */ 281 if (mat->was_assembled) { 282 if (!baij->colmap) { 283 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 284 } 285 #if defined (PETSC_USE_CTABLE) 286 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 287 col = col - 1; 288 #else 289 col = baij->colmap[in[j]/bs] - 1; 290 #endif 291 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 292 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 293 col = in[j]; 294 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 295 B = baij->B; 296 b = (Mat_SeqBAIJ*)(B)->data; 297 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 298 ba=b->a; 299 } else col += in[j]%bs; 300 } else col = in[j]; 301 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 302 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 303 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 304 } 305 } 306 } else { /* off processor entry */ 307 if (!baij->donotstash) { 308 n_loc = 0; 309 for (j=0; j<n; j++){ 310 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 311 in_loc[n_loc] = in[j]; 312 if (roworiented) { 313 v_loc[n_loc] = v[i*n+j]; 314 } else { 315 v_loc[n_loc] = v[j*m+i]; 316 } 317 n_loc++; 318 } 319 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 320 } 321 } 322 } 323 PetscFunctionReturn(0); 324 } 325 326 #undef __FUNCT__ 327 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 328 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 329 { 330 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 331 const MatScalar *value; 332 MatScalar *barray=baij->barray; 333 PetscTruth roworiented = baij->roworiented; 334 PetscErrorCode ierr; 335 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 336 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 337 PetscInt cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2; 338 339 PetscFunctionBegin; 340 if(!barray) { 341 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 342 baij->barray = barray; 343 } 344 345 if (roworiented) { 346 stepval = (n-1)*bs; 347 } else { 348 stepval = (m-1)*bs; 349 } 350 for (i=0; i<m; i++) { 351 if (im[i] < 0) continue; 352 #if defined(PETSC_USE_DEBUG) 353 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 354 #endif 355 if (im[i] >= rstart && im[i] < rend) { 356 row = im[i] - rstart; 357 for (j=0; j<n; j++) { 358 /* If NumCol = 1 then a copy is not required */ 359 if ((roworiented) && (n == 1)) { 360 barray = (MatScalar*) v + i*bs2; 361 } else if((!roworiented) && (m == 1)) { 362 barray = (MatScalar*) v + j*bs2; 363 } else { /* Here a copy is required */ 364 if (roworiented) { 365 value = v + i*(stepval+bs)*bs + j*bs; 366 } else { 367 value = v + j*(stepval+bs)*bs + i*bs; 368 } 369 for (ii=0; ii<bs; ii++,value+=stepval) { 370 for (jj=0; jj<bs; jj++) { 371 *barray++ = *value++; 372 } 373 } 374 barray -=bs2; 375 } 376 377 if (in[j] >= cstart && in[j] < cend){ 378 col = in[j] - cstart; 379 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 380 } 381 else if (in[j] < 0) continue; 382 #if defined(PETSC_USE_DEBUG) 383 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 384 #endif 385 else { 386 if (mat->was_assembled) { 387 if (!baij->colmap) { 388 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 389 } 390 391 #if defined(PETSC_USE_DEBUG) 392 #if defined (PETSC_USE_CTABLE) 393 { PetscInt data; 394 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 395 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 396 } 397 #else 398 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 399 #endif 400 #endif 401 #if defined (PETSC_USE_CTABLE) 402 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 403 col = (col - 1)/bs; 404 #else 405 col = (baij->colmap[in[j]] - 1)/bs; 406 #endif 407 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 408 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 409 col = in[j]; 410 } 411 } 412 else col = in[j]; 413 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 414 } 415 } 416 } else { 417 if (!baij->donotstash) { 418 if (roworiented) { 419 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 420 } else { 421 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 422 } 423 } 424 } 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatGetValues_MPISBAIJ" 431 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 432 { 433 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 434 PetscErrorCode ierr; 435 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 436 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 437 438 PetscFunctionBegin; 439 for (i=0; i<m; i++) { 440 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 441 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 442 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 443 row = idxm[i] - bsrstart; 444 for (j=0; j<n; j++) { 445 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 446 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 447 if (idxn[j] >= bscstart && idxn[j] < bscend){ 448 col = idxn[j] - bscstart; 449 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 450 } else { 451 if (!baij->colmap) { 452 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 453 } 454 #if defined (PETSC_USE_CTABLE) 455 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 456 data --; 457 #else 458 data = baij->colmap[idxn[j]/bs]-1; 459 #endif 460 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 461 else { 462 col = data + idxn[j]%bs; 463 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 464 } 465 } 466 } 467 } else { 468 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 469 } 470 } 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatNorm_MPISBAIJ" 476 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 477 { 478 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 479 PetscErrorCode ierr; 480 PetscReal sum[2],*lnorm2; 481 482 PetscFunctionBegin; 483 if (baij->size == 1) { 484 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 485 } else { 486 if (type == NORM_FROBENIUS) { 487 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 488 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 489 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 490 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 491 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 492 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 493 *norm = sqrt(sum[0] + 2*sum[1]); 494 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 495 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 496 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 497 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 498 PetscReal *rsum,*rsum2,vabs; 499 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 500 PetscInt brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs; 501 MatScalar *v; 502 503 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 504 rsum2 = rsum + mat->cmap.N; 505 ierr = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 506 /* Amat */ 507 v = amat->a; jj = amat->j; 508 for (brow=0; brow<mbs; brow++) { 509 grow = bs*(rstart + brow); 510 nz = amat->i[brow+1] - amat->i[brow]; 511 for (bcol=0; bcol<nz; bcol++){ 512 gcol = bs*(rstart + *jj); jj++; 513 for (col=0; col<bs; col++){ 514 for (row=0; row<bs; row++){ 515 vabs = PetscAbsScalar(*v); v++; 516 rsum[gcol+col] += vabs; 517 /* non-diagonal block */ 518 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 519 } 520 } 521 } 522 } 523 /* Bmat */ 524 v = bmat->a; jj = bmat->j; 525 for (brow=0; brow<mbs; brow++) { 526 grow = bs*(rstart + brow); 527 nz = bmat->i[brow+1] - bmat->i[brow]; 528 for (bcol=0; bcol<nz; bcol++){ 529 gcol = bs*garray[*jj]; jj++; 530 for (col=0; col<bs; col++){ 531 for (row=0; row<bs; row++){ 532 vabs = PetscAbsScalar(*v); v++; 533 rsum[gcol+col] += vabs; 534 rsum[grow+row] += vabs; 535 } 536 } 537 } 538 } 539 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 540 *norm = 0.0; 541 for (col=0; col<mat->cmap.N; col++) { 542 if (rsum2[col] > *norm) *norm = rsum2[col]; 543 } 544 ierr = PetscFree(rsum);CHKERRQ(ierr); 545 } else { 546 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 547 } 548 } 549 PetscFunctionReturn(0); 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 554 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 555 { 556 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 557 PetscErrorCode ierr; 558 PetscInt nstash,reallocs; 559 InsertMode addv; 560 561 PetscFunctionBegin; 562 if (baij->donotstash) { 563 PetscFunctionReturn(0); 564 } 565 566 /* make sure all processors are either in INSERTMODE or ADDMODE */ 567 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 568 if (addv == (ADD_VALUES|INSERT_VALUES)) { 569 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 570 } 571 mat->insertmode = addv; /* in case this processor had no cache */ 572 573 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 574 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 575 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 576 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 577 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 578 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 584 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 585 { 586 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 587 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 588 PetscErrorCode ierr; 589 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 590 PetscInt *row,*col,other_disassembled; 591 PetscMPIInt n; 592 PetscTruth r1,r2,r3; 593 MatScalar *val; 594 InsertMode addv = mat->insertmode; 595 596 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 597 PetscFunctionBegin; 598 599 if (!baij->donotstash) { 600 while (1) { 601 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 602 if (!flg) break; 603 604 for (i=0; i<n;) { 605 /* Now identify the consecutive vals belonging to the same row */ 606 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 607 if (j < n) ncols = j-i; 608 else ncols = n-i; 609 /* Now assemble all these values with a single function call */ 610 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 611 i = j; 612 } 613 } 614 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 615 /* Now process the block-stash. Since the values are stashed column-oriented, 616 set the roworiented flag to column oriented, and after MatSetValues() 617 restore the original flags */ 618 r1 = baij->roworiented; 619 r2 = a->roworiented; 620 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 621 baij->roworiented = PETSC_FALSE; 622 a->roworiented = PETSC_FALSE; 623 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 624 while (1) { 625 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 626 if (!flg) break; 627 628 for (i=0; i<n;) { 629 /* Now identify the consecutive vals belonging to the same row */ 630 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 631 if (j < n) ncols = j-i; 632 else ncols = n-i; 633 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 634 i = j; 635 } 636 } 637 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 638 baij->roworiented = r1; 639 a->roworiented = r2; 640 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 641 } 642 643 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 644 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 645 646 /* determine if any processor has disassembled, if so we must 647 also disassemble ourselfs, in order that we may reassemble. */ 648 /* 649 if nonzero structure of submatrix B cannot change then we know that 650 no processor disassembled thus we can skip this stuff 651 */ 652 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 653 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 654 if (mat->was_assembled && !other_disassembled) { 655 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 656 } 657 } 658 659 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 660 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 661 } 662 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 663 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 664 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 665 666 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 667 baij->rowvalues = 0; 668 669 PetscFunctionReturn(0); 670 } 671 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 675 { 676 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 677 PetscErrorCode ierr; 678 PetscInt bs = mat->rmap.bs; 679 PetscMPIInt size = baij->size,rank = baij->rank; 680 PetscTruth iascii,isdraw; 681 PetscViewer sviewer; 682 PetscViewerFormat format; 683 684 PetscFunctionBegin; 685 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 686 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 687 if (iascii) { 688 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 689 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 690 MatInfo info; 691 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 692 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 694 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 695 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 696 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 697 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 698 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 699 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 700 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 701 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 702 PetscFunctionReturn(0); 703 } else if (format == PETSC_VIEWER_ASCII_INFO) { 704 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 707 PetscFunctionReturn(0); 708 } 709 } 710 711 if (isdraw) { 712 PetscDraw draw; 713 PetscTruth isnull; 714 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 715 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 716 } 717 718 if (size == 1) { 719 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 720 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 721 } else { 722 /* assemble the entire matrix onto first processor. */ 723 Mat A; 724 Mat_SeqSBAIJ *Aloc; 725 Mat_SeqBAIJ *Bloc; 726 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 727 MatScalar *a; 728 729 /* Should this be the same type as mat? */ 730 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 731 if (!rank) { 732 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 733 } else { 734 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 735 } 736 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 737 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 738 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 739 740 /* copy over the A part */ 741 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 742 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 743 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 744 745 for (i=0; i<mbs; i++) { 746 rvals[0] = bs*(baij->rstartbs + i); 747 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 748 for (j=ai[i]; j<ai[i+1]; j++) { 749 col = (baij->cstartbs+aj[j])*bs; 750 for (k=0; k<bs; k++) { 751 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 752 col++; a += bs; 753 } 754 } 755 } 756 /* copy over the B part */ 757 Bloc = (Mat_SeqBAIJ*)baij->B->data; 758 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 759 for (i=0; i<mbs; i++) { 760 761 rvals[0] = bs*(baij->rstartbs + i); 762 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 763 for (j=ai[i]; j<ai[i+1]; j++) { 764 col = baij->garray[aj[j]]*bs; 765 for (k=0; k<bs; k++) { 766 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 767 col++; a += bs; 768 } 769 } 770 } 771 ierr = PetscFree(rvals);CHKERRQ(ierr); 772 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 773 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 774 /* 775 Everyone has to call to draw the matrix since the graphics waits are 776 synchronized across all processors that share the PetscDraw object 777 */ 778 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 779 if (!rank) { 780 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 781 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 782 } 783 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 784 ierr = MatDestroy(A);CHKERRQ(ierr); 785 } 786 PetscFunctionReturn(0); 787 } 788 789 #undef __FUNCT__ 790 #define __FUNCT__ "MatView_MPISBAIJ" 791 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 792 { 793 PetscErrorCode ierr; 794 PetscTruth iascii,isdraw,issocket,isbinary; 795 796 PetscFunctionBegin; 797 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 798 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 799 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 800 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 801 if (iascii || isdraw || issocket || isbinary) { 802 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 803 } else { 804 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 805 } 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatDestroy_MPISBAIJ" 811 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 812 { 813 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 814 PetscErrorCode ierr; 815 816 PetscFunctionBegin; 817 #if defined(PETSC_USE_LOG) 818 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 819 #endif 820 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 821 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 822 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 823 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 824 #if defined (PETSC_USE_CTABLE) 825 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 826 #else 827 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 828 #endif 829 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 830 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 831 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 832 if (baij->slvec0) { 833 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 834 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 835 } 836 if (baij->slvec1) { 837 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 838 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 839 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 840 } 841 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 842 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 843 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 844 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 845 #if defined(PETSC_USE_MAT_SINGLE) 846 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 847 #endif 848 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 849 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 850 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 851 ierr = PetscFree(baij);CHKERRQ(ierr); 852 853 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 854 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 855 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 856 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatMult_MPISBAIJ" 862 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 863 { 864 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 865 PetscErrorCode ierr; 866 PetscInt nt,mbs=a->mbs,bs=A->rmap.bs; 867 PetscScalar *x,*from,zero=0.0; 868 869 PetscFunctionBegin; 870 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 871 if (nt != A->cmap.n) { 872 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 873 } 874 875 /* diagonal part */ 876 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 877 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 878 879 /* subdiagonal part */ 880 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 881 CHKMEMQ; 882 /* copy x into the vec slvec0 */ 883 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 884 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 885 CHKMEMQ; 886 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 887 CHKMEMQ; 888 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 889 890 CHKMEMQ; 891 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 892 CHKMEMQ; 893 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 894 CHKMEMQ; 895 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 896 CHKMEMQ; 897 /* supperdiagonal part */ 898 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 899 CHKMEMQ; 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 905 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 906 { 907 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 908 PetscErrorCode ierr; 909 PetscInt nt; 910 911 PetscFunctionBegin; 912 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 913 if (nt != A->cmap.n) { 914 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 915 } 916 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 917 if (nt != A->rmap.N) { 918 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 919 } 920 921 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 922 /* do diagonal part */ 923 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 924 /* do supperdiagonal part */ 925 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 926 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 927 /* do subdiagonal part */ 928 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 929 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 930 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 931 932 PetscFunctionReturn(0); 933 } 934 935 #undef __FUNCT__ 936 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 937 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 938 { 939 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 940 PetscErrorCode ierr; 941 PetscInt mbs=a->mbs,bs=A->rmap.bs; 942 PetscScalar *x,*from,zero=0.0; 943 944 PetscFunctionBegin; 945 /* 946 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 947 PetscSynchronizedFlush(A->comm); 948 */ 949 /* diagonal part */ 950 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 951 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 952 953 /* subdiagonal part */ 954 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 955 956 /* copy x into the vec slvec0 */ 957 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 958 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 959 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 960 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 961 962 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 963 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 964 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 965 966 /* supperdiagonal part */ 967 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 968 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 974 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 975 { 976 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 977 PetscErrorCode ierr; 978 979 PetscFunctionBegin; 980 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 981 /* do diagonal part */ 982 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 983 /* do supperdiagonal part */ 984 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 985 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 986 987 /* do subdiagonal part */ 988 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 989 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 990 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 991 992 PetscFunctionReturn(0); 993 } 994 995 /* 996 This only works correctly for square matrices where the subblock A->A is the 997 diagonal block 998 */ 999 #undef __FUNCT__ 1000 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1001 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1002 { 1003 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1004 PetscErrorCode ierr; 1005 1006 PetscFunctionBegin; 1007 /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1008 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatScale_MPISBAIJ" 1014 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1015 { 1016 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1017 PetscErrorCode ierr; 1018 1019 PetscFunctionBegin; 1020 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1021 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1027 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1028 { 1029 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1030 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1031 PetscErrorCode ierr; 1032 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1033 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1034 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1035 1036 PetscFunctionBegin; 1037 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1038 mat->getrowactive = PETSC_TRUE; 1039 1040 if (!mat->rowvalues && (idx || v)) { 1041 /* 1042 allocate enough space to hold information from the longest row. 1043 */ 1044 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1045 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1046 PetscInt max = 1,mbs = mat->mbs,tmp; 1047 for (i=0; i<mbs; i++) { 1048 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1049 if (max < tmp) { max = tmp; } 1050 } 1051 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1052 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1053 } 1054 1055 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1056 lrow = row - brstart; /* local row index */ 1057 1058 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1059 if (!v) {pvA = 0; pvB = 0;} 1060 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1061 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1062 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1063 nztot = nzA + nzB; 1064 1065 cmap = mat->garray; 1066 if (v || idx) { 1067 if (nztot) { 1068 /* Sort by increasing column numbers, assuming A and B already sorted */ 1069 PetscInt imark = -1; 1070 if (v) { 1071 *v = v_p = mat->rowvalues; 1072 for (i=0; i<nzB; i++) { 1073 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1074 else break; 1075 } 1076 imark = i; 1077 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1078 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1079 } 1080 if (idx) { 1081 *idx = idx_p = mat->rowindices; 1082 if (imark > -1) { 1083 for (i=0; i<imark; i++) { 1084 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1085 } 1086 } else { 1087 for (i=0; i<nzB; i++) { 1088 if (cmap[cworkB[i]/bs] < cstart) 1089 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1090 else break; 1091 } 1092 imark = i; 1093 } 1094 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1095 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1096 } 1097 } else { 1098 if (idx) *idx = 0; 1099 if (v) *v = 0; 1100 } 1101 } 1102 *nz = nztot; 1103 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1104 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1110 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1111 { 1112 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1113 1114 PetscFunctionBegin; 1115 if (!baij->getrowactive) { 1116 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1117 } 1118 baij->getrowactive = PETSC_FALSE; 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1124 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1125 { 1126 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1127 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1128 1129 PetscFunctionBegin; 1130 aA->getrow_utriangular = PETSC_TRUE; 1131 PetscFunctionReturn(0); 1132 } 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1135 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1136 { 1137 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1138 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1139 1140 PetscFunctionBegin; 1141 aA->getrow_utriangular = PETSC_FALSE; 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1147 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1148 { 1149 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1154 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1155 PetscFunctionReturn(0); 1156 } 1157 1158 #undef __FUNCT__ 1159 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1160 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1161 { 1162 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1163 PetscErrorCode ierr; 1164 1165 PetscFunctionBegin; 1166 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1167 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 #undef __FUNCT__ 1172 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1173 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1174 { 1175 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1176 PetscErrorCode ierr; 1177 1178 PetscFunctionBegin; 1179 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1180 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 #undef __FUNCT__ 1185 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1186 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1187 { 1188 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1189 Mat A = a->A,B = a->B; 1190 PetscErrorCode ierr; 1191 PetscReal isend[5],irecv[5]; 1192 1193 PetscFunctionBegin; 1194 info->block_size = (PetscReal)matin->rmap.bs; 1195 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1196 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1197 isend[3] = info->memory; isend[4] = info->mallocs; 1198 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1199 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1200 isend[3] += info->memory; isend[4] += info->mallocs; 1201 if (flag == MAT_LOCAL) { 1202 info->nz_used = isend[0]; 1203 info->nz_allocated = isend[1]; 1204 info->nz_unneeded = isend[2]; 1205 info->memory = isend[3]; 1206 info->mallocs = isend[4]; 1207 } else if (flag == MAT_GLOBAL_MAX) { 1208 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1209 info->nz_used = irecv[0]; 1210 info->nz_allocated = irecv[1]; 1211 info->nz_unneeded = irecv[2]; 1212 info->memory = irecv[3]; 1213 info->mallocs = irecv[4]; 1214 } else if (flag == MAT_GLOBAL_SUM) { 1215 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1216 info->nz_used = irecv[0]; 1217 info->nz_allocated = irecv[1]; 1218 info->nz_unneeded = irecv[2]; 1219 info->memory = irecv[3]; 1220 info->mallocs = irecv[4]; 1221 } else { 1222 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1223 } 1224 info->rows_global = (PetscReal)A->rmap.N; 1225 info->columns_global = (PetscReal)A->cmap.N; 1226 info->rows_local = (PetscReal)A->rmap.N; 1227 info->columns_local = (PetscReal)A->cmap.N; 1228 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1229 info->fill_ratio_needed = 0; 1230 info->factor_mallocs = 0; 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1236 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1237 { 1238 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1239 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 switch (op) { 1244 case MAT_NO_NEW_NONZERO_LOCATIONS: 1245 case MAT_YES_NEW_NONZERO_LOCATIONS: 1246 case MAT_COLUMNS_UNSORTED: 1247 case MAT_COLUMNS_SORTED: 1248 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1249 case MAT_KEEP_ZEROED_ROWS: 1250 case MAT_NEW_NONZERO_LOCATION_ERR: 1251 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1252 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1253 break; 1254 case MAT_ROW_ORIENTED: 1255 a->roworiented = PETSC_TRUE; 1256 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1257 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1258 break; 1259 case MAT_ROWS_SORTED: 1260 case MAT_ROWS_UNSORTED: 1261 case MAT_YES_NEW_DIAGONALS: 1262 ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr); 1263 break; 1264 case MAT_COLUMN_ORIENTED: 1265 a->roworiented = PETSC_FALSE; 1266 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1267 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1268 break; 1269 case MAT_IGNORE_OFF_PROC_ENTRIES: 1270 a->donotstash = PETSC_TRUE; 1271 break; 1272 case MAT_NO_NEW_DIAGONALS: 1273 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1274 case MAT_USE_HASH_TABLE: 1275 a->ht_flag = PETSC_TRUE; 1276 break; 1277 case MAT_NOT_SYMMETRIC: 1278 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1279 case MAT_HERMITIAN: 1280 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1281 case MAT_SYMMETRIC: 1282 case MAT_STRUCTURALLY_SYMMETRIC: 1283 case MAT_NOT_HERMITIAN: 1284 case MAT_SYMMETRY_ETERNAL: 1285 case MAT_NOT_SYMMETRY_ETERNAL: 1286 break; 1287 case MAT_IGNORE_LOWER_TRIANGULAR: 1288 aA->ignore_ltriangular = PETSC_TRUE; 1289 break; 1290 case MAT_ERROR_LOWER_TRIANGULAR: 1291 aA->ignore_ltriangular = PETSC_FALSE; 1292 break; 1293 case MAT_GETROW_UPPERTRIANGULAR: 1294 aA->getrow_utriangular = PETSC_TRUE; 1295 break; 1296 default: 1297 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1298 } 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1304 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1305 { 1306 PetscErrorCode ierr; 1307 PetscFunctionBegin; 1308 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1314 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1315 { 1316 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1317 Mat a=baij->A, b=baij->B; 1318 PetscErrorCode ierr; 1319 PetscInt nv,m,n; 1320 PetscTruth flg; 1321 1322 PetscFunctionBegin; 1323 if (ll != rr){ 1324 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1325 if (!flg) 1326 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1327 } 1328 if (!ll) PetscFunctionReturn(0); 1329 1330 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1331 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1332 1333 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1334 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1335 1336 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1337 1338 /* left diagonalscale the off-diagonal part */ 1339 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1340 1341 /* scale the diagonal part */ 1342 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1343 1344 /* right diagonalscale the off-diagonal part */ 1345 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1346 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1352 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1353 { 1354 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1355 MPI_Comm comm = A->comm; 1356 static PetscTruth called = PETSC_FALSE; 1357 PetscErrorCode ierr; 1358 1359 PetscFunctionBegin; 1360 if (!a->rank) { 1361 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1362 } 1363 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1364 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1365 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1371 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1372 { 1373 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1374 PetscErrorCode ierr; 1375 1376 PetscFunctionBegin; 1377 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatEqual_MPISBAIJ" 1385 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1386 { 1387 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1388 Mat a,b,c,d; 1389 PetscTruth flg; 1390 PetscErrorCode ierr; 1391 1392 PetscFunctionBegin; 1393 a = matA->A; b = matA->B; 1394 c = matB->A; d = matB->B; 1395 1396 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1397 if (flg) { 1398 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1399 } 1400 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatCopy_MPISBAIJ" 1406 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1407 { 1408 PetscErrorCode ierr; 1409 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1410 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1411 1412 PetscFunctionBegin; 1413 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1414 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1415 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1416 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1417 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1418 } else { 1419 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1420 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1427 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1428 { 1429 PetscErrorCode ierr; 1430 1431 PetscFunctionBegin; 1432 ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #include "petscblaslapack.h" 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1439 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1440 { 1441 PetscErrorCode ierr; 1442 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1443 PetscBLASInt bnz,one=1; 1444 Mat_SeqSBAIJ *xa,*ya; 1445 Mat_SeqBAIJ *xb,*yb; 1446 1447 PetscFunctionBegin; 1448 if (str == SAME_NONZERO_PATTERN) { 1449 PetscScalar alpha = a; 1450 xa = (Mat_SeqSBAIJ *)xx->A->data; 1451 ya = (Mat_SeqSBAIJ *)yy->A->data; 1452 bnz = (PetscBLASInt)xa->nz; 1453 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1454 xb = (Mat_SeqBAIJ *)xx->B->data; 1455 yb = (Mat_SeqBAIJ *)yy->B->data; 1456 bnz = (PetscBLASInt)xb->nz; 1457 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1458 } else { 1459 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1460 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1461 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1468 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1469 { 1470 PetscErrorCode ierr; 1471 PetscInt i; 1472 PetscTruth flg; 1473 1474 PetscFunctionBegin; 1475 for (i=0; i<n; i++) { 1476 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1477 if (!flg) { 1478 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1479 } 1480 } 1481 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 1486 /* -------------------------------------------------------------------*/ 1487 static struct _MatOps MatOps_Values = { 1488 MatSetValues_MPISBAIJ, 1489 MatGetRow_MPISBAIJ, 1490 MatRestoreRow_MPISBAIJ, 1491 MatMult_MPISBAIJ, 1492 /* 4*/ MatMultAdd_MPISBAIJ, 1493 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1494 MatMultAdd_MPISBAIJ, 1495 0, 1496 0, 1497 0, 1498 /*10*/ 0, 1499 0, 1500 0, 1501 MatRelax_MPISBAIJ, 1502 MatTranspose_MPISBAIJ, 1503 /*15*/ MatGetInfo_MPISBAIJ, 1504 MatEqual_MPISBAIJ, 1505 MatGetDiagonal_MPISBAIJ, 1506 MatDiagonalScale_MPISBAIJ, 1507 MatNorm_MPISBAIJ, 1508 /*20*/ MatAssemblyBegin_MPISBAIJ, 1509 MatAssemblyEnd_MPISBAIJ, 1510 0, 1511 MatSetOption_MPISBAIJ, 1512 MatZeroEntries_MPISBAIJ, 1513 /*25*/ 0, 1514 0, 1515 0, 1516 0, 1517 0, 1518 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1519 0, 1520 0, 1521 0, 1522 0, 1523 /*35*/ MatDuplicate_MPISBAIJ, 1524 0, 1525 0, 1526 0, 1527 0, 1528 /*40*/ MatAXPY_MPISBAIJ, 1529 MatGetSubMatrices_MPISBAIJ, 1530 MatIncreaseOverlap_MPISBAIJ, 1531 MatGetValues_MPISBAIJ, 1532 MatCopy_MPISBAIJ, 1533 /*45*/ MatPrintHelp_MPISBAIJ, 1534 MatScale_MPISBAIJ, 1535 0, 1536 0, 1537 0, 1538 /*50*/ 0, 1539 0, 1540 0, 1541 0, 1542 0, 1543 /*55*/ 0, 1544 0, 1545 MatSetUnfactored_MPISBAIJ, 1546 0, 1547 MatSetValuesBlocked_MPISBAIJ, 1548 /*60*/ 0, 1549 0, 1550 0, 1551 0, 1552 0, 1553 /*65*/ 0, 1554 0, 1555 0, 1556 0, 1557 0, 1558 /*70*/ MatGetRowMax_MPISBAIJ, 1559 0, 1560 0, 1561 0, 1562 0, 1563 /*75*/ 0, 1564 0, 1565 0, 1566 0, 1567 0, 1568 /*80*/ 0, 1569 0, 1570 0, 1571 0, 1572 MatLoad_MPISBAIJ, 1573 /*85*/ 0, 1574 0, 1575 0, 1576 0, 1577 0, 1578 /*90*/ 0, 1579 0, 1580 0, 1581 0, 1582 0, 1583 /*95*/ 0, 1584 0, 1585 0, 1586 0, 1587 0, 1588 /*100*/0, 1589 0, 1590 0, 1591 0, 1592 0, 1593 /*105*/0, 1594 MatRealPart_MPISBAIJ, 1595 MatImaginaryPart_MPISBAIJ, 1596 MatGetRowUpperTriangular_MPISBAIJ, 1597 MatRestoreRowUpperTriangular_MPISBAIJ 1598 }; 1599 1600 1601 EXTERN_C_BEGIN 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1604 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1605 { 1606 PetscFunctionBegin; 1607 *a = ((Mat_MPISBAIJ *)A->data)->A; 1608 *iscopy = PETSC_FALSE; 1609 PetscFunctionReturn(0); 1610 } 1611 EXTERN_C_END 1612 1613 EXTERN_C_BEGIN 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1616 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1617 { 1618 Mat_MPISBAIJ *b; 1619 PetscErrorCode ierr; 1620 PetscInt i,mbs,Mbs; 1621 1622 PetscFunctionBegin; 1623 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1624 1625 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1626 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1627 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1628 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1629 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1630 1631 B->rmap.bs = B->cmap.bs = bs; 1632 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1633 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1634 1635 if (d_nnz) { 1636 for (i=0; i<B->rmap.n/bs; i++) { 1637 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1638 } 1639 } 1640 if (o_nnz) { 1641 for (i=0; i<B->rmap.n/bs; i++) { 1642 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1643 } 1644 } 1645 B->preallocated = PETSC_TRUE; 1646 1647 b = (Mat_MPISBAIJ*)B->data; 1648 mbs = B->rmap.n/bs; 1649 Mbs = B->rmap.N/bs; 1650 if (mbs*bs != B->rmap.n) { 1651 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs); 1652 } 1653 1654 B->rmap.bs = bs; 1655 b->bs2 = bs*bs; 1656 b->mbs = mbs; 1657 b->nbs = mbs; 1658 b->Mbs = Mbs; 1659 b->Nbs = Mbs; 1660 1661 for (i=0; i<=b->size; i++) { 1662 b->rangebs[i] = B->rmap.range[i]/bs; 1663 } 1664 b->rstartbs = B->rmap.rstart/bs; 1665 b->rendbs = B->rmap.rend/bs; 1666 1667 b->cstartbs = B->cmap.rstart/bs; 1668 b->cendbs = B->cmap.rend/bs; 1669 1670 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1671 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1672 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1673 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1674 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1675 1676 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1677 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1678 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1679 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1680 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1681 1682 /* build cache for off array entries formed */ 1683 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1684 1685 PetscFunctionReturn(0); 1686 } 1687 EXTERN_C_END 1688 1689 /*MC 1690 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1691 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1692 1693 Options Database Keys: 1694 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1695 1696 Level: beginner 1697 1698 .seealso: MatCreateMPISBAIJ 1699 M*/ 1700 1701 EXTERN_C_BEGIN 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatCreate_MPISBAIJ" 1704 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1705 { 1706 Mat_MPISBAIJ *b; 1707 PetscErrorCode ierr; 1708 PetscTruth flg; 1709 1710 PetscFunctionBegin; 1711 1712 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1713 B->data = (void*)b; 1714 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1715 1716 B->ops->destroy = MatDestroy_MPISBAIJ; 1717 B->ops->view = MatView_MPISBAIJ; 1718 B->mapping = 0; 1719 B->factor = 0; 1720 B->assembled = PETSC_FALSE; 1721 1722 B->insertmode = NOT_SET_VALUES; 1723 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1724 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1725 1726 /* build local table of row and column ownerships */ 1727 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1728 1729 /* build cache for off array entries formed */ 1730 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1731 b->donotstash = PETSC_FALSE; 1732 b->colmap = PETSC_NULL; 1733 b->garray = PETSC_NULL; 1734 b->roworiented = PETSC_TRUE; 1735 1736 #if defined(PETSC_USE_MAT_SINGLE) 1737 /* stuff for MatSetValues_XXX in single precision */ 1738 b->setvalueslen = 0; 1739 b->setvaluescopy = PETSC_NULL; 1740 #endif 1741 1742 /* stuff used in block assembly */ 1743 b->barray = 0; 1744 1745 /* stuff used for matrix vector multiply */ 1746 b->lvec = 0; 1747 b->Mvctx = 0; 1748 b->slvec0 = 0; 1749 b->slvec0b = 0; 1750 b->slvec1 = 0; 1751 b->slvec1a = 0; 1752 b->slvec1b = 0; 1753 b->sMvctx = 0; 1754 1755 /* stuff for MatGetRow() */ 1756 b->rowindices = 0; 1757 b->rowvalues = 0; 1758 b->getrowactive = PETSC_FALSE; 1759 1760 /* hash table stuff */ 1761 b->ht = 0; 1762 b->hd = 0; 1763 b->ht_size = 0; 1764 b->ht_flag = PETSC_FALSE; 1765 b->ht_fact = 0; 1766 b->ht_total_ct = 0; 1767 b->ht_insert_ct = 0; 1768 1769 b->in_loc = 0; 1770 b->v_loc = 0; 1771 b->n_loc = 0; 1772 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1773 if (flg) { 1774 PetscReal fact = 1.39; 1775 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1776 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1777 if (fact <= 1.0) fact = 1.39; 1778 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1779 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1780 } 1781 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1782 "MatStoreValues_MPISBAIJ", 1783 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1784 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1785 "MatRetrieveValues_MPISBAIJ", 1786 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1787 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1788 "MatGetDiagonalBlock_MPISBAIJ", 1789 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1790 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1791 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1792 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1793 B->symmetric = PETSC_TRUE; 1794 B->structurally_symmetric = PETSC_TRUE; 1795 B->symmetric_set = PETSC_TRUE; 1796 B->structurally_symmetric_set = PETSC_TRUE; 1797 PetscFunctionReturn(0); 1798 } 1799 EXTERN_C_END 1800 1801 /*MC 1802 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1803 1804 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1805 and MATMPISBAIJ otherwise. 1806 1807 Options Database Keys: 1808 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1809 1810 Level: beginner 1811 1812 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1813 M*/ 1814 1815 EXTERN_C_BEGIN 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatCreate_SBAIJ" 1818 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1819 { 1820 PetscErrorCode ierr; 1821 PetscMPIInt size; 1822 1823 PetscFunctionBegin; 1824 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1825 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1826 if (size == 1) { 1827 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1828 } else { 1829 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1830 } 1831 PetscFunctionReturn(0); 1832 } 1833 EXTERN_C_END 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1837 /*@C 1838 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1839 the user should preallocate the matrix storage by setting the parameters 1840 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1841 performance can be increased by more than a factor of 50. 1842 1843 Collective on Mat 1844 1845 Input Parameters: 1846 + A - the matrix 1847 . bs - size of blockk 1848 . d_nz - number of block nonzeros per block row in diagonal portion of local 1849 submatrix (same for all local rows) 1850 . d_nnz - array containing the number of block nonzeros in the various block rows 1851 in the upper triangular and diagonal part of the in diagonal portion of the local 1852 (possibly different for each block row) or PETSC_NULL. You must leave room 1853 for the diagonal entry even if it is zero. 1854 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1855 submatrix (same for all local rows). 1856 - o_nnz - array containing the number of nonzeros in the various block rows of the 1857 off-diagonal portion of the local submatrix (possibly different for 1858 each block row) or PETSC_NULL. 1859 1860 1861 Options Database Keys: 1862 . -mat_no_unroll - uses code that does not unroll the loops in the 1863 block calculations (much slower) 1864 . -mat_block_size - size of the blocks to use 1865 1866 Notes: 1867 1868 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1869 than it must be used on all processors that share the object for that argument. 1870 1871 If the *_nnz parameter is given then the *_nz parameter is ignored 1872 1873 Storage Information: 1874 For a square global matrix we define each processor's diagonal portion 1875 to be its local rows and the corresponding columns (a square submatrix); 1876 each processor's off-diagonal portion encompasses the remainder of the 1877 local matrix (a rectangular submatrix). 1878 1879 The user can specify preallocated storage for the diagonal part of 1880 the local submatrix with either d_nz or d_nnz (not both). Set 1881 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1882 memory allocation. Likewise, specify preallocated storage for the 1883 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1884 1885 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1886 the figure below we depict these three local rows and all columns (0-11). 1887 1888 .vb 1889 0 1 2 3 4 5 6 7 8 9 10 11 1890 ------------------- 1891 row 3 | o o o d d d o o o o o o 1892 row 4 | o o o d d d o o o o o o 1893 row 5 | o o o d d d o o o o o o 1894 ------------------- 1895 .ve 1896 1897 Thus, any entries in the d locations are stored in the d (diagonal) 1898 submatrix, and any entries in the o locations are stored in the 1899 o (off-diagonal) submatrix. Note that the d matrix is stored in 1900 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1901 1902 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1903 plus the diagonal part of the d matrix, 1904 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1905 In general, for PDE problems in which most nonzeros are near the diagonal, 1906 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1907 or you will get TERRIBLE performance; see the users' manual chapter on 1908 matrices. 1909 1910 Level: intermediate 1911 1912 .keywords: matrix, block, aij, compressed row, sparse, parallel 1913 1914 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1915 @*/ 1916 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1917 { 1918 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1919 1920 PetscFunctionBegin; 1921 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1922 if (f) { 1923 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1924 } 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "MatCreateMPISBAIJ" 1930 /*@C 1931 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1932 (block compressed row). For good matrix assembly performance 1933 the user should preallocate the matrix storage by setting the parameters 1934 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1935 performance can be increased by more than a factor of 50. 1936 1937 Collective on MPI_Comm 1938 1939 Input Parameters: 1940 + comm - MPI communicator 1941 . bs - size of blockk 1942 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1943 This value should be the same as the local size used in creating the 1944 y vector for the matrix-vector product y = Ax. 1945 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1946 This value should be the same as the local size used in creating the 1947 x vector for the matrix-vector product y = Ax. 1948 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1949 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1950 . d_nz - number of block nonzeros per block row in diagonal portion of local 1951 submatrix (same for all local rows) 1952 . d_nnz - array containing the number of block nonzeros in the various block rows 1953 in the upper triangular portion of the in diagonal portion of the local 1954 (possibly different for each block block row) or PETSC_NULL. 1955 You must leave room for the diagonal entry even if it is zero. 1956 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1957 submatrix (same for all local rows). 1958 - o_nnz - array containing the number of nonzeros in the various block rows of the 1959 off-diagonal portion of the local submatrix (possibly different for 1960 each block row) or PETSC_NULL. 1961 1962 Output Parameter: 1963 . A - the matrix 1964 1965 Options Database Keys: 1966 . -mat_no_unroll - uses code that does not unroll the loops in the 1967 block calculations (much slower) 1968 . -mat_block_size - size of the blocks to use 1969 . -mat_mpi - use the parallel matrix data structures even on one processor 1970 (defaults to using SeqBAIJ format on one processor) 1971 1972 Notes: 1973 The number of rows and columns must be divisible by blocksize. 1974 1975 The user MUST specify either the local or global matrix dimensions 1976 (possibly both). 1977 1978 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1979 than it must be used on all processors that share the object for that argument. 1980 1981 If the *_nnz parameter is given then the *_nz parameter is ignored 1982 1983 Storage Information: 1984 For a square global matrix we define each processor's diagonal portion 1985 to be its local rows and the corresponding columns (a square submatrix); 1986 each processor's off-diagonal portion encompasses the remainder of the 1987 local matrix (a rectangular submatrix). 1988 1989 The user can specify preallocated storage for the diagonal part of 1990 the local submatrix with either d_nz or d_nnz (not both). Set 1991 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1992 memory allocation. Likewise, specify preallocated storage for the 1993 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1994 1995 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1996 the figure below we depict these three local rows and all columns (0-11). 1997 1998 .vb 1999 0 1 2 3 4 5 6 7 8 9 10 11 2000 ------------------- 2001 row 3 | o o o d d d o o o o o o 2002 row 4 | o o o d d d o o o o o o 2003 row 5 | o o o d d d o o o o o o 2004 ------------------- 2005 .ve 2006 2007 Thus, any entries in the d locations are stored in the d (diagonal) 2008 submatrix, and any entries in the o locations are stored in the 2009 o (off-diagonal) submatrix. Note that the d matrix is stored in 2010 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2011 2012 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2013 plus the diagonal part of the d matrix, 2014 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2015 In general, for PDE problems in which most nonzeros are near the diagonal, 2016 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2017 or you will get TERRIBLE performance; see the users' manual chapter on 2018 matrices. 2019 2020 Level: intermediate 2021 2022 .keywords: matrix, block, aij, compressed row, sparse, parallel 2023 2024 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2025 @*/ 2026 2027 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2028 { 2029 PetscErrorCode ierr; 2030 PetscMPIInt size; 2031 2032 PetscFunctionBegin; 2033 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2034 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2035 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2036 if (size > 1) { 2037 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2038 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2039 } else { 2040 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2041 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2042 } 2043 PetscFunctionReturn(0); 2044 } 2045 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2049 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2050 { 2051 Mat mat; 2052 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2053 PetscErrorCode ierr; 2054 PetscInt len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs; 2055 PetscScalar *array; 2056 2057 PetscFunctionBegin; 2058 *newmat = 0; 2059 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2060 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2061 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2062 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2063 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2064 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2065 2066 mat->factor = matin->factor; 2067 mat->preallocated = PETSC_TRUE; 2068 mat->assembled = PETSC_TRUE; 2069 mat->insertmode = NOT_SET_VALUES; 2070 2071 a = (Mat_MPISBAIJ*)mat->data; 2072 a->bs2 = oldmat->bs2; 2073 a->mbs = oldmat->mbs; 2074 a->nbs = oldmat->nbs; 2075 a->Mbs = oldmat->Mbs; 2076 a->Nbs = oldmat->Nbs; 2077 2078 2079 a->size = oldmat->size; 2080 a->rank = oldmat->rank; 2081 a->donotstash = oldmat->donotstash; 2082 a->roworiented = oldmat->roworiented; 2083 a->rowindices = 0; 2084 a->rowvalues = 0; 2085 a->getrowactive = PETSC_FALSE; 2086 a->barray = 0; 2087 a->rstartbs = oldmat->rstartbs; 2088 a->rendbs = oldmat->rendbs; 2089 a->cstartbs = oldmat->cstartbs; 2090 a->cendbs = oldmat->cendbs; 2091 2092 /* hash table stuff */ 2093 a->ht = 0; 2094 a->hd = 0; 2095 a->ht_size = 0; 2096 a->ht_flag = oldmat->ht_flag; 2097 a->ht_fact = oldmat->ht_fact; 2098 a->ht_total_ct = 0; 2099 a->ht_insert_ct = 0; 2100 2101 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2102 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2103 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2104 if (oldmat->colmap) { 2105 #if defined (PETSC_USE_CTABLE) 2106 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2107 #else 2108 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2109 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2110 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2111 #endif 2112 } else a->colmap = 0; 2113 2114 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2115 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2116 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2117 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2118 } else a->garray = 0; 2119 2120 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2121 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2122 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2123 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2124 2125 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2126 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2127 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2128 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2129 2130 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2131 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2132 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2133 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2134 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2135 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2136 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2137 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2138 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2139 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2140 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2141 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2142 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2143 2144 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2145 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2146 a->sMvctx = oldmat->sMvctx; 2147 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2148 2149 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2150 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2151 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2152 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2153 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2154 *newmat = mat; 2155 PetscFunctionReturn(0); 2156 } 2157 2158 #include "petscsys.h" 2159 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatLoad_MPISBAIJ" 2162 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2163 { 2164 Mat A; 2165 PetscErrorCode ierr; 2166 PetscInt i,nz,j,rstart,rend; 2167 PetscScalar *vals,*buf; 2168 MPI_Comm comm = ((PetscObject)viewer)->comm; 2169 MPI_Status status; 2170 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 2171 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2172 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2173 PetscInt bs=1,Mbs,mbs,extra_rows; 2174 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2175 PetscInt dcount,kmax,k,nzcount,tmp; 2176 int fd; 2177 2178 PetscFunctionBegin; 2179 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2180 2181 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2182 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2183 if (!rank) { 2184 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2185 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2186 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2187 if (header[3] < 0) { 2188 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2189 } 2190 } 2191 2192 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2193 M = header[1]; N = header[2]; 2194 2195 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2196 2197 /* 2198 This code adds extra rows to make sure the number of rows is 2199 divisible by the blocksize 2200 */ 2201 Mbs = M/bs; 2202 extra_rows = bs - M + bs*(Mbs); 2203 if (extra_rows == bs) extra_rows = 0; 2204 else Mbs++; 2205 if (extra_rows &&!rank) { 2206 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2207 } 2208 2209 /* determine ownership of all rows */ 2210 mbs = Mbs/size + ((Mbs % size) > rank); 2211 m = mbs*bs; 2212 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2213 browners = rowners + size + 1; 2214 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2215 rowners[0] = 0; 2216 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2217 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2218 rstart = rowners[rank]; 2219 rend = rowners[rank+1]; 2220 2221 /* distribute row lengths to all processors */ 2222 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2223 if (!rank) { 2224 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2225 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2226 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2227 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2228 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2229 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2230 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2231 } else { 2232 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2233 } 2234 2235 if (!rank) { /* procs[0] */ 2236 /* calculate the number of nonzeros on each processor */ 2237 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2238 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2239 for (i=0; i<size; i++) { 2240 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2241 procsnz[i] += rowlengths[j]; 2242 } 2243 } 2244 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2245 2246 /* determine max buffer needed and allocate it */ 2247 maxnz = 0; 2248 for (i=0; i<size; i++) { 2249 maxnz = PetscMax(maxnz,procsnz[i]); 2250 } 2251 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2252 2253 /* read in my part of the matrix column indices */ 2254 nz = procsnz[0]; 2255 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2256 mycols = ibuf; 2257 if (size == 1) nz -= extra_rows; 2258 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2259 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2260 2261 /* read in every ones (except the last) and ship off */ 2262 for (i=1; i<size-1; i++) { 2263 nz = procsnz[i]; 2264 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2265 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2266 } 2267 /* read in the stuff for the last proc */ 2268 if (size != 1) { 2269 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2270 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2271 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2272 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2273 } 2274 ierr = PetscFree(cols);CHKERRQ(ierr); 2275 } else { /* procs[i], i>0 */ 2276 /* determine buffer space needed for message */ 2277 nz = 0; 2278 for (i=0; i<m; i++) { 2279 nz += locrowlens[i]; 2280 } 2281 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2282 mycols = ibuf; 2283 /* receive message of column indices*/ 2284 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2285 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2286 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2287 } 2288 2289 /* loop over local rows, determining number of off diagonal entries */ 2290 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2291 odlens = dlens + (rend-rstart); 2292 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2293 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2294 masked1 = mask + Mbs; 2295 masked2 = masked1 + Mbs; 2296 rowcount = 0; nzcount = 0; 2297 for (i=0; i<mbs; i++) { 2298 dcount = 0; 2299 odcount = 0; 2300 for (j=0; j<bs; j++) { 2301 kmax = locrowlens[rowcount]; 2302 for (k=0; k<kmax; k++) { 2303 tmp = mycols[nzcount++]/bs; /* block col. index */ 2304 if (!mask[tmp]) { 2305 mask[tmp] = 1; 2306 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2307 else masked1[dcount++] = tmp; /* entry in diag portion */ 2308 } 2309 } 2310 rowcount++; 2311 } 2312 2313 dlens[i] = dcount; /* d_nzz[i] */ 2314 odlens[i] = odcount; /* o_nzz[i] */ 2315 2316 /* zero out the mask elements we set */ 2317 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2318 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2319 } 2320 2321 /* create our matrix */ 2322 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2323 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2324 ierr = MatSetType(A,type);CHKERRQ(ierr); 2325 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2326 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2327 2328 if (!rank) { 2329 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2330 /* read in my part of the matrix numerical values */ 2331 nz = procsnz[0]; 2332 vals = buf; 2333 mycols = ibuf; 2334 if (size == 1) nz -= extra_rows; 2335 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2336 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2337 2338 /* insert into matrix */ 2339 jj = rstart*bs; 2340 for (i=0; i<m; i++) { 2341 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2342 mycols += locrowlens[i]; 2343 vals += locrowlens[i]; 2344 jj++; 2345 } 2346 2347 /* read in other processors (except the last one) and ship out */ 2348 for (i=1; i<size-1; i++) { 2349 nz = procsnz[i]; 2350 vals = buf; 2351 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2352 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2353 } 2354 /* the last proc */ 2355 if (size != 1){ 2356 nz = procsnz[i] - extra_rows; 2357 vals = buf; 2358 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2359 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2360 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2361 } 2362 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2363 2364 } else { 2365 /* receive numeric values */ 2366 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2367 2368 /* receive message of values*/ 2369 vals = buf; 2370 mycols = ibuf; 2371 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2372 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2373 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2374 2375 /* insert into matrix */ 2376 jj = rstart*bs; 2377 for (i=0; i<m; i++) { 2378 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2379 mycols += locrowlens[i]; 2380 vals += locrowlens[i]; 2381 jj++; 2382 } 2383 } 2384 2385 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2386 ierr = PetscFree(buf);CHKERRQ(ierr); 2387 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2388 ierr = PetscFree(rowners);CHKERRQ(ierr); 2389 ierr = PetscFree(dlens);CHKERRQ(ierr); 2390 ierr = PetscFree(mask);CHKERRQ(ierr); 2391 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2392 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2393 *newmat = A; 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #undef __FUNCT__ 2398 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2399 /*XXXXX@ 2400 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2401 2402 Input Parameters: 2403 . mat - the matrix 2404 . fact - factor 2405 2406 Collective on Mat 2407 2408 Level: advanced 2409 2410 Notes: 2411 This can also be set by the command line option: -mat_use_hash_table fact 2412 2413 .keywords: matrix, hashtable, factor, HT 2414 2415 .seealso: MatSetOption() 2416 @XXXXX*/ 2417 2418 2419 #undef __FUNCT__ 2420 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2421 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2422 { 2423 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2424 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2425 PetscReal atmp; 2426 PetscReal *work,*svalues,*rvalues; 2427 PetscErrorCode ierr; 2428 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2429 PetscMPIInt rank,size; 2430 PetscInt *rowners_bs,dest,count,source; 2431 PetscScalar *va; 2432 MatScalar *ba; 2433 MPI_Status stat; 2434 2435 PetscFunctionBegin; 2436 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2437 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2438 2439 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2440 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2441 2442 bs = A->rmap.bs; 2443 mbs = a->mbs; 2444 Mbs = a->Mbs; 2445 ba = b->a; 2446 bi = b->i; 2447 bj = b->j; 2448 2449 /* find ownerships */ 2450 rowners_bs = A->rmap.range; 2451 2452 /* each proc creates an array to be distributed */ 2453 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2454 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2455 2456 /* row_max for B */ 2457 if (rank != size-1){ 2458 for (i=0; i<mbs; i++) { 2459 ncols = bi[1] - bi[0]; bi++; 2460 brow = bs*i; 2461 for (j=0; j<ncols; j++){ 2462 bcol = bs*(*bj); 2463 for (kcol=0; kcol<bs; kcol++){ 2464 col = bcol + kcol; /* local col index */ 2465 col += rowners_bs[rank+1]; /* global col index */ 2466 for (krow=0; krow<bs; krow++){ 2467 atmp = PetscAbsScalar(*ba); ba++; 2468 row = brow + krow; /* local row index */ 2469 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2470 if (work[col] < atmp) work[col] = atmp; 2471 } 2472 } 2473 bj++; 2474 } 2475 } 2476 2477 /* send values to its owners */ 2478 for (dest=rank+1; dest<size; dest++){ 2479 svalues = work + rowners_bs[dest]; 2480 count = rowners_bs[dest+1]-rowners_bs[dest]; 2481 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2482 } 2483 } 2484 2485 /* receive values */ 2486 if (rank){ 2487 rvalues = work; 2488 count = rowners_bs[rank+1]-rowners_bs[rank]; 2489 for (source=0; source<rank; source++){ 2490 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2491 /* process values */ 2492 for (i=0; i<count; i++){ 2493 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2494 } 2495 } 2496 } 2497 2498 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2499 ierr = PetscFree(work);CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "MatRelax_MPISBAIJ" 2505 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2506 { 2507 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2508 PetscErrorCode ierr; 2509 PetscInt mbs=mat->mbs,bs=matin->rmap.bs; 2510 PetscScalar *x,*b,*ptr,zero=0.0; 2511 Vec bb1; 2512 2513 PetscFunctionBegin; 2514 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2515 if (bs > 1) 2516 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2517 2518 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2519 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2520 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2521 its--; 2522 } 2523 2524 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2525 while (its--){ 2526 2527 /* lower triangular part: slvec0b = - B^T*xx */ 2528 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2529 2530 /* copy xx into slvec0a */ 2531 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2532 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2533 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2534 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2535 2536 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2537 2538 /* copy bb into slvec1a */ 2539 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2540 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2541 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2542 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2543 2544 /* set slvec1b = 0 */ 2545 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2546 2547 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2548 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2549 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2550 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2551 2552 /* upper triangular part: bb1 = bb1 - B*x */ 2553 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2554 2555 /* local diagonal sweep */ 2556 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2557 } 2558 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2559 } else { 2560 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2561 } 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2567 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2568 { 2569 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2570 PetscErrorCode ierr; 2571 Vec lvec1,bb1; 2572 2573 PetscFunctionBegin; 2574 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2575 if (matin->rmap.bs > 1) 2576 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2577 2578 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2579 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2580 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2581 its--; 2582 } 2583 2584 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2585 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2586 while (its--){ 2587 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2588 2589 /* lower diagonal part: bb1 = bb - B^T*xx */ 2590 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2591 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2592 2593 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2594 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2595 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2596 2597 /* upper diagonal part: bb1 = bb1 - B*x */ 2598 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2599 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2600 2601 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2602 2603 /* diagonal sweep */ 2604 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2605 } 2606 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2607 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2608 } else { 2609 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2610 } 2611 PetscFunctionReturn(0); 2612 } 2613 2614