1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew); \ 104 N = nrow++ - 1; \ 105 /* shift up all the later entries in this row */ \ 106 for (ii=N; ii>=_i; ii--) { \ 107 rp[ii+1] = rp[ii]; \ 108 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 109 } \ 110 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 111 rp[_i] = bcol; \ 112 ap[bs2*_i + bs*cidx + ridx] = value; \ 113 a_noinsert:; \ 114 ailen[brow] = nrow; \ 115 } 116 #ifndef MatSetValues_SeqBAIJ_B_Private 117 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 118 { \ 119 brow = row/bs; \ 120 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 121 rmax = bimax[brow]; nrow = bilen[brow]; \ 122 bcol = col/bs; \ 123 ridx = row % bs; cidx = col % bs; \ 124 low = 0; high = nrow; \ 125 while (high-low > 3) { \ 126 t = (low+high)/2; \ 127 if (rp[t] > bcol) high = t; \ 128 else low = t; \ 129 } \ 130 for (_i=low; _i<high; _i++) { \ 131 if (rp[_i] > bcol) break; \ 132 if (rp[_i] == bcol) { \ 133 bap = ap + bs2*_i + bs*cidx + ridx; \ 134 if (addv == ADD_VALUES) *bap += value; \ 135 else *bap = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (b->nonew == 1) goto b_noinsert; \ 140 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 141 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew); \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 b_noinsert:; \ 152 bilen[brow] = nrow; \ 153 } 154 #endif 155 156 #if defined(PETSC_USE_MAT_SINGLE) 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetValues_MPISBAIJ" 159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 160 { 161 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,N = m*n; 164 MatScalar *vsingle; 165 166 PetscFunctionBegin; 167 if (N > b->setvalueslen) { 168 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 169 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 170 b->setvalueslen = N; 171 } 172 vsingle = b->setvaluescopy; 173 174 for (i=0; i<N; i++) { 175 vsingle[i] = v[i]; 176 } 177 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 184 { 185 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 186 PetscErrorCode ierr; 187 PetscInt i,N = m*n*b->bs2; 188 MatScalar *vsingle; 189 190 PetscFunctionBegin; 191 if (N > b->setvalueslen) { 192 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 193 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 194 b->setvalueslen = N; 195 } 196 vsingle = b->setvaluescopy; 197 for (i=0; i<N; i++) { 198 vsingle[i] = v[i]; 199 } 200 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 #endif 204 205 /* Only add/insert a(i,j) with i<=j (blocks). 206 Any a(i,j) with i>j input by user is ingored. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar" 210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 211 { 212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 213 MatScalar value; 214 PetscTruth roworiented = baij->roworiented; 215 PetscErrorCode ierr; 216 PetscInt i,j,row,col; 217 PetscInt rstart_orig=mat->rmap.rstart; 218 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 219 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 220 221 /* Some Variables required in the macro */ 222 Mat A = baij->A; 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 224 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 225 MatScalar *aa=a->a; 226 227 Mat B = baij->B; 228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 229 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 230 MatScalar *ba=b->a; 231 232 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 233 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 234 MatScalar *ap,*bap; 235 236 /* for stash */ 237 PetscInt n_loc, *in_loc = PETSC_NULL; 238 MatScalar *v_loc = PETSC_NULL; 239 240 PetscFunctionBegin; 241 242 if (!baij->donotstash){ 243 if (n > baij->n_loc) { 244 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 245 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 246 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 247 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 248 baij->n_loc = n; 249 } 250 in_loc = baij->in_loc; 251 v_loc = baij->v_loc; 252 } 253 254 for (i=0; i<m; i++) { 255 if (im[i] < 0) continue; 256 #if defined(PETSC_USE_DEBUG) 257 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 258 #endif 259 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 260 row = im[i] - rstart_orig; /* local row index */ 261 for (j=0; j<n; j++) { 262 if (im[i]/bs > in[j]/bs){ 263 if (a->ignore_ltriangular){ 264 continue; /* ignore lower triangular blocks */ 265 } else { 266 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 267 } 268 } 269 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 270 col = in[j] - cstart_orig; /* local col index */ 271 brow = row/bs; bcol = col/bs; 272 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 273 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 274 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 275 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 276 } else if (in[j] < 0) continue; 277 #if defined(PETSC_USE_DEBUG) 278 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 279 #endif 280 else { /* off-diag entry (B) */ 281 if (mat->was_assembled) { 282 if (!baij->colmap) { 283 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 284 } 285 #if defined (PETSC_USE_CTABLE) 286 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 287 col = col - 1; 288 #else 289 col = baij->colmap[in[j]/bs] - 1; 290 #endif 291 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 292 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 293 col = in[j]; 294 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 295 B = baij->B; 296 b = (Mat_SeqBAIJ*)(B)->data; 297 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 298 ba=b->a; 299 } else col += in[j]%bs; 300 } else col = in[j]; 301 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 302 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 303 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 304 } 305 } 306 } else { /* off processor entry */ 307 if (!baij->donotstash) { 308 n_loc = 0; 309 for (j=0; j<n; j++){ 310 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 311 in_loc[n_loc] = in[j]; 312 if (roworiented) { 313 v_loc[n_loc] = v[i*n+j]; 314 } else { 315 v_loc[n_loc] = v[j*m+i]; 316 } 317 n_loc++; 318 } 319 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 320 } 321 } 322 } 323 PetscFunctionReturn(0); 324 } 325 326 #undef __FUNCT__ 327 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 328 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 329 { 330 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 331 const MatScalar *value; 332 MatScalar *barray=baij->barray; 333 PetscTruth roworiented = baij->roworiented; 334 PetscErrorCode ierr; 335 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 336 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 337 PetscInt cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2; 338 339 PetscFunctionBegin; 340 if(!barray) { 341 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 342 baij->barray = barray; 343 } 344 345 if (roworiented) { 346 stepval = (n-1)*bs; 347 } else { 348 stepval = (m-1)*bs; 349 } 350 for (i=0; i<m; i++) { 351 if (im[i] < 0) continue; 352 #if defined(PETSC_USE_DEBUG) 353 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 354 #endif 355 if (im[i] >= rstart && im[i] < rend) { 356 row = im[i] - rstart; 357 for (j=0; j<n; j++) { 358 /* If NumCol = 1 then a copy is not required */ 359 if ((roworiented) && (n == 1)) { 360 barray = (MatScalar*) v + i*bs2; 361 } else if((!roworiented) && (m == 1)) { 362 barray = (MatScalar*) v + j*bs2; 363 } else { /* Here a copy is required */ 364 if (roworiented) { 365 value = v + i*(stepval+bs)*bs + j*bs; 366 } else { 367 value = v + j*(stepval+bs)*bs + i*bs; 368 } 369 for (ii=0; ii<bs; ii++,value+=stepval) { 370 for (jj=0; jj<bs; jj++) { 371 *barray++ = *value++; 372 } 373 } 374 barray -=bs2; 375 } 376 377 if (in[j] >= cstart && in[j] < cend){ 378 col = in[j] - cstart; 379 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 380 } 381 else if (in[j] < 0) continue; 382 #if defined(PETSC_USE_DEBUG) 383 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 384 #endif 385 else { 386 if (mat->was_assembled) { 387 if (!baij->colmap) { 388 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 389 } 390 391 #if defined(PETSC_USE_DEBUG) 392 #if defined (PETSC_USE_CTABLE) 393 { PetscInt data; 394 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 395 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 396 } 397 #else 398 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 399 #endif 400 #endif 401 #if defined (PETSC_USE_CTABLE) 402 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 403 col = (col - 1)/bs; 404 #else 405 col = (baij->colmap[in[j]] - 1)/bs; 406 #endif 407 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 408 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 409 col = in[j]; 410 } 411 } 412 else col = in[j]; 413 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 414 } 415 } 416 } else { 417 if (!baij->donotstash) { 418 if (roworiented) { 419 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 420 } else { 421 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 422 } 423 } 424 } 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatGetValues_MPISBAIJ" 431 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 432 { 433 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 434 PetscErrorCode ierr; 435 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 436 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 437 438 PetscFunctionBegin; 439 for (i=0; i<m; i++) { 440 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 441 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 442 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 443 row = idxm[i] - bsrstart; 444 for (j=0; j<n; j++) { 445 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 446 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 447 if (idxn[j] >= bscstart && idxn[j] < bscend){ 448 col = idxn[j] - bscstart; 449 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 450 } else { 451 if (!baij->colmap) { 452 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 453 } 454 #if defined (PETSC_USE_CTABLE) 455 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 456 data --; 457 #else 458 data = baij->colmap[idxn[j]/bs]-1; 459 #endif 460 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 461 else { 462 col = data + idxn[j]%bs; 463 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 464 } 465 } 466 } 467 } else { 468 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 469 } 470 } 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatNorm_MPISBAIJ" 476 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 477 { 478 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 479 PetscErrorCode ierr; 480 PetscReal sum[2],*lnorm2; 481 482 PetscFunctionBegin; 483 if (baij->size == 1) { 484 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 485 } else { 486 if (type == NORM_FROBENIUS) { 487 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 488 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 489 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 490 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 491 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 492 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 493 *norm = sqrt(sum[0] + 2*sum[1]); 494 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 495 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 496 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 497 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 498 PetscReal *rsum,*rsum2,vabs; 499 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 500 PetscInt brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs; 501 MatScalar *v; 502 503 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 504 rsum2 = rsum + mat->cmap.N; 505 ierr = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 506 /* Amat */ 507 v = amat->a; jj = amat->j; 508 for (brow=0; brow<mbs; brow++) { 509 grow = bs*(rstart + brow); 510 nz = amat->i[brow+1] - amat->i[brow]; 511 for (bcol=0; bcol<nz; bcol++){ 512 gcol = bs*(rstart + *jj); jj++; 513 for (col=0; col<bs; col++){ 514 for (row=0; row<bs; row++){ 515 vabs = PetscAbsScalar(*v); v++; 516 rsum[gcol+col] += vabs; 517 /* non-diagonal block */ 518 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 519 } 520 } 521 } 522 } 523 /* Bmat */ 524 v = bmat->a; jj = bmat->j; 525 for (brow=0; brow<mbs; brow++) { 526 grow = bs*(rstart + brow); 527 nz = bmat->i[brow+1] - bmat->i[brow]; 528 for (bcol=0; bcol<nz; bcol++){ 529 gcol = bs*garray[*jj]; jj++; 530 for (col=0; col<bs; col++){ 531 for (row=0; row<bs; row++){ 532 vabs = PetscAbsScalar(*v); v++; 533 rsum[gcol+col] += vabs; 534 rsum[grow+row] += vabs; 535 } 536 } 537 } 538 } 539 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 540 *norm = 0.0; 541 for (col=0; col<mat->cmap.N; col++) { 542 if (rsum2[col] > *norm) *norm = rsum2[col]; 543 } 544 ierr = PetscFree(rsum);CHKERRQ(ierr); 545 } else { 546 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 547 } 548 } 549 PetscFunctionReturn(0); 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 554 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 555 { 556 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 557 PetscErrorCode ierr; 558 PetscInt nstash,reallocs; 559 InsertMode addv; 560 561 PetscFunctionBegin; 562 if (baij->donotstash) { 563 PetscFunctionReturn(0); 564 } 565 566 /* make sure all processors are either in INSERTMODE or ADDMODE */ 567 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 568 if (addv == (ADD_VALUES|INSERT_VALUES)) { 569 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 570 } 571 mat->insertmode = addv; /* in case this processor had no cache */ 572 573 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 574 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 575 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 576 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 577 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 578 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 584 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 585 { 586 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 587 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 588 PetscErrorCode ierr; 589 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 590 PetscInt *row,*col,other_disassembled; 591 PetscMPIInt n; 592 PetscTruth r1,r2,r3; 593 MatScalar *val; 594 InsertMode addv = mat->insertmode; 595 596 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 597 PetscFunctionBegin; 598 599 if (!baij->donotstash) { 600 while (1) { 601 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 602 if (!flg) break; 603 604 for (i=0; i<n;) { 605 /* Now identify the consecutive vals belonging to the same row */ 606 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 607 if (j < n) ncols = j-i; 608 else ncols = n-i; 609 /* Now assemble all these values with a single function call */ 610 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 611 i = j; 612 } 613 } 614 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 615 /* Now process the block-stash. Since the values are stashed column-oriented, 616 set the roworiented flag to column oriented, and after MatSetValues() 617 restore the original flags */ 618 r1 = baij->roworiented; 619 r2 = a->roworiented; 620 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 621 baij->roworiented = PETSC_FALSE; 622 a->roworiented = PETSC_FALSE; 623 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 624 while (1) { 625 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 626 if (!flg) break; 627 628 for (i=0; i<n;) { 629 /* Now identify the consecutive vals belonging to the same row */ 630 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 631 if (j < n) ncols = j-i; 632 else ncols = n-i; 633 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 634 i = j; 635 } 636 } 637 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 638 baij->roworiented = r1; 639 a->roworiented = r2; 640 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 641 } 642 643 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 644 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 645 646 /* determine if any processor has disassembled, if so we must 647 also disassemble ourselfs, in order that we may reassemble. */ 648 /* 649 if nonzero structure of submatrix B cannot change then we know that 650 no processor disassembled thus we can skip this stuff 651 */ 652 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 653 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 654 if (mat->was_assembled && !other_disassembled) { 655 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 656 } 657 } 658 659 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 660 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 661 } 662 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 663 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 664 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 665 666 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 667 baij->rowvalues = 0; 668 669 PetscFunctionReturn(0); 670 } 671 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 675 { 676 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 677 PetscErrorCode ierr; 678 PetscInt bs = mat->rmap.bs; 679 PetscMPIInt size = baij->size,rank = baij->rank; 680 PetscTruth iascii,isdraw; 681 PetscViewer sviewer; 682 PetscViewerFormat format; 683 684 PetscFunctionBegin; 685 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 686 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 687 if (iascii) { 688 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 689 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 690 MatInfo info; 691 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 692 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 694 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 695 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 696 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 697 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 698 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 699 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 700 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 701 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 702 PetscFunctionReturn(0); 703 } else if (format == PETSC_VIEWER_ASCII_INFO) { 704 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 707 PetscFunctionReturn(0); 708 } 709 } 710 711 if (isdraw) { 712 PetscDraw draw; 713 PetscTruth isnull; 714 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 715 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 716 } 717 718 if (size == 1) { 719 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 720 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 721 } else { 722 /* assemble the entire matrix onto first processor. */ 723 Mat A; 724 Mat_SeqSBAIJ *Aloc; 725 Mat_SeqBAIJ *Bloc; 726 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 727 MatScalar *a; 728 729 /* Should this be the same type as mat? */ 730 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 731 if (!rank) { 732 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 733 } else { 734 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 735 } 736 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 737 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 738 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 739 740 /* copy over the A part */ 741 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 742 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 743 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 744 745 for (i=0; i<mbs; i++) { 746 rvals[0] = mat->rmap.rstart + bs*i; 747 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 748 for (j=ai[i]; j<ai[i+1]; j++) { 749 col = mat->cmap.rstart+aj[j]*bs; 750 for (k=0; k<bs; k++) { 751 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 752 col++; a += bs; 753 } 754 } 755 } 756 /* copy over the B part */ 757 Bloc = (Mat_SeqBAIJ*)baij->B->data; 758 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 759 for (i=0; i<mbs; i++) { 760 rvals[0] = mat->rmap.rstart + bs; 761 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 762 for (j=ai[i]; j<ai[i+1]; j++) { 763 col = baij->garray[aj[j]]*bs; 764 for (k=0; k<bs; k++) { 765 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 766 col++; a += bs; 767 } 768 } 769 } 770 ierr = PetscFree(rvals);CHKERRQ(ierr); 771 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 772 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 773 /* 774 Everyone has to call to draw the matrix since the graphics waits are 775 synchronized across all processors that share the PetscDraw object 776 */ 777 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 778 if (!rank) { 779 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 780 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 781 } 782 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 783 ierr = MatDestroy(A);CHKERRQ(ierr); 784 } 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatView_MPISBAIJ" 790 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 791 { 792 PetscErrorCode ierr; 793 PetscTruth iascii,isdraw,issocket,isbinary; 794 795 PetscFunctionBegin; 796 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 797 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 798 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 799 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 800 if (iascii || isdraw || issocket || isbinary) { 801 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 802 } else { 803 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 804 } 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatDestroy_MPISBAIJ" 810 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 811 { 812 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 813 PetscErrorCode ierr; 814 815 PetscFunctionBegin; 816 #if defined(PETSC_USE_LOG) 817 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 818 #endif 819 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 820 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 821 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 822 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 823 #if defined (PETSC_USE_CTABLE) 824 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 825 #else 826 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 827 #endif 828 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 829 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 830 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 831 if (baij->slvec0) { 832 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 833 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 834 } 835 if (baij->slvec1) { 836 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 837 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 838 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 839 } 840 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 841 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 842 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 843 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 844 #if defined(PETSC_USE_MAT_SINGLE) 845 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 846 #endif 847 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 848 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 849 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 850 ierr = PetscFree(baij);CHKERRQ(ierr); 851 852 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 853 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 854 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 855 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 856 PetscFunctionReturn(0); 857 } 858 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatMult_MPISBAIJ" 861 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 862 { 863 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 864 PetscErrorCode ierr; 865 PetscInt nt,mbs=a->mbs,bs=A->rmap.bs; 866 PetscScalar *x,*from,zero=0.0; 867 868 PetscFunctionBegin; 869 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 870 if (nt != A->cmap.n) { 871 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 872 } 873 874 /* diagonal part */ 875 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 876 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 877 878 /* subdiagonal part */ 879 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 880 CHKMEMQ; 881 /* copy x into the vec slvec0 */ 882 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 883 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 884 CHKMEMQ; 885 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 886 CHKMEMQ; 887 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 888 889 CHKMEMQ; 890 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 891 CHKMEMQ; 892 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 893 CHKMEMQ; 894 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 895 CHKMEMQ; 896 /* supperdiagonal part */ 897 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 898 CHKMEMQ; 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 904 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 905 { 906 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 907 PetscErrorCode ierr; 908 PetscInt nt; 909 910 PetscFunctionBegin; 911 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 912 if (nt != A->cmap.n) { 913 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 914 } 915 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 916 if (nt != A->rmap.N) { 917 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 918 } 919 920 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 921 /* do diagonal part */ 922 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 923 /* do supperdiagonal part */ 924 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 925 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 926 /* do subdiagonal part */ 927 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 928 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 929 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 930 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 936 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 937 { 938 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 939 PetscErrorCode ierr; 940 PetscInt mbs=a->mbs,bs=A->rmap.bs; 941 PetscScalar *x,*from,zero=0.0; 942 943 PetscFunctionBegin; 944 /* 945 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 946 PetscSynchronizedFlush(A->comm); 947 */ 948 /* diagonal part */ 949 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 950 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 951 952 /* subdiagonal part */ 953 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 954 955 /* copy x into the vec slvec0 */ 956 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 957 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 958 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 959 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 960 961 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 962 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 963 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 964 965 /* supperdiagonal part */ 966 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 967 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 973 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 974 { 975 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 980 /* do diagonal part */ 981 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 982 /* do supperdiagonal part */ 983 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 984 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 985 986 /* do subdiagonal part */ 987 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 988 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 989 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 990 991 PetscFunctionReturn(0); 992 } 993 994 /* 995 This only works correctly for square matrices where the subblock A->A is the 996 diagonal block 997 */ 998 #undef __FUNCT__ 999 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1000 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1001 { 1002 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1003 PetscErrorCode ierr; 1004 1005 PetscFunctionBegin; 1006 /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1007 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatScale_MPISBAIJ" 1013 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1014 { 1015 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1016 PetscErrorCode ierr; 1017 1018 PetscFunctionBegin; 1019 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1020 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1026 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1027 { 1028 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1029 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1030 PetscErrorCode ierr; 1031 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1032 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1033 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1034 1035 PetscFunctionBegin; 1036 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1037 mat->getrowactive = PETSC_TRUE; 1038 1039 if (!mat->rowvalues && (idx || v)) { 1040 /* 1041 allocate enough space to hold information from the longest row. 1042 */ 1043 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1044 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1045 PetscInt max = 1,mbs = mat->mbs,tmp; 1046 for (i=0; i<mbs; i++) { 1047 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1048 if (max < tmp) { max = tmp; } 1049 } 1050 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1051 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1052 } 1053 1054 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1055 lrow = row - brstart; /* local row index */ 1056 1057 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1058 if (!v) {pvA = 0; pvB = 0;} 1059 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1060 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1061 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1062 nztot = nzA + nzB; 1063 1064 cmap = mat->garray; 1065 if (v || idx) { 1066 if (nztot) { 1067 /* Sort by increasing column numbers, assuming A and B already sorted */ 1068 PetscInt imark = -1; 1069 if (v) { 1070 *v = v_p = mat->rowvalues; 1071 for (i=0; i<nzB; i++) { 1072 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1073 else break; 1074 } 1075 imark = i; 1076 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1077 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1078 } 1079 if (idx) { 1080 *idx = idx_p = mat->rowindices; 1081 if (imark > -1) { 1082 for (i=0; i<imark; i++) { 1083 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1084 } 1085 } else { 1086 for (i=0; i<nzB; i++) { 1087 if (cmap[cworkB[i]/bs] < cstart) 1088 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1089 else break; 1090 } 1091 imark = i; 1092 } 1093 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1094 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1095 } 1096 } else { 1097 if (idx) *idx = 0; 1098 if (v) *v = 0; 1099 } 1100 } 1101 *nz = nztot; 1102 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1103 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1109 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1110 { 1111 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1112 1113 PetscFunctionBegin; 1114 if (!baij->getrowactive) { 1115 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1116 } 1117 baij->getrowactive = PETSC_FALSE; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1123 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1124 { 1125 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1126 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1127 1128 PetscFunctionBegin; 1129 aA->getrow_utriangular = PETSC_TRUE; 1130 PetscFunctionReturn(0); 1131 } 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1134 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1135 { 1136 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1137 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1138 1139 PetscFunctionBegin; 1140 aA->getrow_utriangular = PETSC_FALSE; 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1146 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1147 { 1148 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1149 PetscErrorCode ierr; 1150 1151 PetscFunctionBegin; 1152 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1153 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #undef __FUNCT__ 1158 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1159 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1160 { 1161 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1162 PetscErrorCode ierr; 1163 1164 PetscFunctionBegin; 1165 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1166 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1172 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1173 { 1174 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1179 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1185 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1186 { 1187 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1188 Mat A = a->A,B = a->B; 1189 PetscErrorCode ierr; 1190 PetscReal isend[5],irecv[5]; 1191 1192 PetscFunctionBegin; 1193 info->block_size = (PetscReal)matin->rmap.bs; 1194 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1195 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1196 isend[3] = info->memory; isend[4] = info->mallocs; 1197 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1198 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1199 isend[3] += info->memory; isend[4] += info->mallocs; 1200 if (flag == MAT_LOCAL) { 1201 info->nz_used = isend[0]; 1202 info->nz_allocated = isend[1]; 1203 info->nz_unneeded = isend[2]; 1204 info->memory = isend[3]; 1205 info->mallocs = isend[4]; 1206 } else if (flag == MAT_GLOBAL_MAX) { 1207 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1208 info->nz_used = irecv[0]; 1209 info->nz_allocated = irecv[1]; 1210 info->nz_unneeded = irecv[2]; 1211 info->memory = irecv[3]; 1212 info->mallocs = irecv[4]; 1213 } else if (flag == MAT_GLOBAL_SUM) { 1214 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1215 info->nz_used = irecv[0]; 1216 info->nz_allocated = irecv[1]; 1217 info->nz_unneeded = irecv[2]; 1218 info->memory = irecv[3]; 1219 info->mallocs = irecv[4]; 1220 } else { 1221 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1222 } 1223 info->rows_global = (PetscReal)A->rmap.N; 1224 info->columns_global = (PetscReal)A->cmap.N; 1225 info->rows_local = (PetscReal)A->rmap.N; 1226 info->columns_local = (PetscReal)A->cmap.N; 1227 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1228 info->fill_ratio_needed = 0; 1229 info->factor_mallocs = 0; 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1235 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1236 { 1237 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1238 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1239 PetscErrorCode ierr; 1240 1241 PetscFunctionBegin; 1242 switch (op) { 1243 case MAT_NO_NEW_NONZERO_LOCATIONS: 1244 case MAT_YES_NEW_NONZERO_LOCATIONS: 1245 case MAT_COLUMNS_UNSORTED: 1246 case MAT_COLUMNS_SORTED: 1247 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1248 case MAT_KEEP_ZEROED_ROWS: 1249 case MAT_NEW_NONZERO_LOCATION_ERR: 1250 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1251 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1252 break; 1253 case MAT_ROW_ORIENTED: 1254 a->roworiented = PETSC_TRUE; 1255 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1256 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1257 break; 1258 case MAT_ROWS_SORTED: 1259 case MAT_ROWS_UNSORTED: 1260 case MAT_YES_NEW_DIAGONALS: 1261 ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr); 1262 break; 1263 case MAT_COLUMN_ORIENTED: 1264 a->roworiented = PETSC_FALSE; 1265 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1266 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1267 break; 1268 case MAT_IGNORE_OFF_PROC_ENTRIES: 1269 a->donotstash = PETSC_TRUE; 1270 break; 1271 case MAT_NO_NEW_DIAGONALS: 1272 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1273 case MAT_USE_HASH_TABLE: 1274 a->ht_flag = PETSC_TRUE; 1275 break; 1276 case MAT_NOT_SYMMETRIC: 1277 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1278 case MAT_HERMITIAN: 1279 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1280 case MAT_SYMMETRIC: 1281 case MAT_STRUCTURALLY_SYMMETRIC: 1282 case MAT_NOT_HERMITIAN: 1283 case MAT_SYMMETRY_ETERNAL: 1284 case MAT_NOT_SYMMETRY_ETERNAL: 1285 break; 1286 case MAT_IGNORE_LOWER_TRIANGULAR: 1287 aA->ignore_ltriangular = PETSC_TRUE; 1288 break; 1289 case MAT_ERROR_LOWER_TRIANGULAR: 1290 aA->ignore_ltriangular = PETSC_FALSE; 1291 break; 1292 case MAT_GETROW_UPPERTRIANGULAR: 1293 aA->getrow_utriangular = PETSC_TRUE; 1294 break; 1295 default: 1296 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1297 } 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1303 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1304 { 1305 PetscErrorCode ierr; 1306 PetscFunctionBegin; 1307 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1313 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1314 { 1315 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1316 Mat a=baij->A, b=baij->B; 1317 PetscErrorCode ierr; 1318 PetscInt nv,m,n; 1319 PetscTruth flg; 1320 1321 PetscFunctionBegin; 1322 if (ll != rr){ 1323 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1324 if (!flg) 1325 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1326 } 1327 if (!ll) PetscFunctionReturn(0); 1328 1329 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1330 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1331 1332 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1333 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1334 1335 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1336 1337 /* left diagonalscale the off-diagonal part */ 1338 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1339 1340 /* scale the diagonal part */ 1341 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1342 1343 /* right diagonalscale the off-diagonal part */ 1344 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1345 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1351 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1352 { 1353 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1354 MPI_Comm comm = A->comm; 1355 static PetscTruth called = PETSC_FALSE; 1356 PetscErrorCode ierr; 1357 1358 PetscFunctionBegin; 1359 if (!a->rank) { 1360 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1361 } 1362 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1363 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1364 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1370 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1371 { 1372 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1373 PetscErrorCode ierr; 1374 1375 PetscFunctionBegin; 1376 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1381 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "MatEqual_MPISBAIJ" 1384 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1385 { 1386 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1387 Mat a,b,c,d; 1388 PetscTruth flg; 1389 PetscErrorCode ierr; 1390 1391 PetscFunctionBegin; 1392 a = matA->A; b = matA->B; 1393 c = matB->A; d = matB->B; 1394 1395 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1396 if (flg) { 1397 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1398 } 1399 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatCopy_MPISBAIJ" 1405 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1406 { 1407 PetscErrorCode ierr; 1408 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1409 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1410 1411 PetscFunctionBegin; 1412 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1413 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1414 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1415 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1416 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1417 } else { 1418 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1419 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1420 } 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1426 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1427 { 1428 PetscErrorCode ierr; 1429 1430 PetscFunctionBegin; 1431 ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #include "petscblaslapack.h" 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1438 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1439 { 1440 PetscErrorCode ierr; 1441 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1442 PetscBLASInt bnz,one=1; 1443 Mat_SeqSBAIJ *xa,*ya; 1444 Mat_SeqBAIJ *xb,*yb; 1445 1446 PetscFunctionBegin; 1447 if (str == SAME_NONZERO_PATTERN) { 1448 PetscScalar alpha = a; 1449 xa = (Mat_SeqSBAIJ *)xx->A->data; 1450 ya = (Mat_SeqSBAIJ *)yy->A->data; 1451 bnz = (PetscBLASInt)xa->nz; 1452 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1453 xb = (Mat_SeqBAIJ *)xx->B->data; 1454 yb = (Mat_SeqBAIJ *)yy->B->data; 1455 bnz = (PetscBLASInt)xb->nz; 1456 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1457 } else { 1458 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1459 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1460 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1461 } 1462 PetscFunctionReturn(0); 1463 } 1464 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1467 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1468 { 1469 PetscErrorCode ierr; 1470 PetscInt i; 1471 PetscTruth flg; 1472 1473 PetscFunctionBegin; 1474 for (i=0; i<n; i++) { 1475 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1476 if (!flg) { 1477 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1478 } 1479 } 1480 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } 1483 1484 1485 /* -------------------------------------------------------------------*/ 1486 static struct _MatOps MatOps_Values = { 1487 MatSetValues_MPISBAIJ, 1488 MatGetRow_MPISBAIJ, 1489 MatRestoreRow_MPISBAIJ, 1490 MatMult_MPISBAIJ, 1491 /* 4*/ MatMultAdd_MPISBAIJ, 1492 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1493 MatMultAdd_MPISBAIJ, 1494 0, 1495 0, 1496 0, 1497 /*10*/ 0, 1498 0, 1499 0, 1500 MatRelax_MPISBAIJ, 1501 MatTranspose_MPISBAIJ, 1502 /*15*/ MatGetInfo_MPISBAIJ, 1503 MatEqual_MPISBAIJ, 1504 MatGetDiagonal_MPISBAIJ, 1505 MatDiagonalScale_MPISBAIJ, 1506 MatNorm_MPISBAIJ, 1507 /*20*/ MatAssemblyBegin_MPISBAIJ, 1508 MatAssemblyEnd_MPISBAIJ, 1509 0, 1510 MatSetOption_MPISBAIJ, 1511 MatZeroEntries_MPISBAIJ, 1512 /*25*/ 0, 1513 0, 1514 0, 1515 0, 1516 0, 1517 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1518 0, 1519 0, 1520 0, 1521 0, 1522 /*35*/ MatDuplicate_MPISBAIJ, 1523 0, 1524 0, 1525 0, 1526 0, 1527 /*40*/ MatAXPY_MPISBAIJ, 1528 MatGetSubMatrices_MPISBAIJ, 1529 MatIncreaseOverlap_MPISBAIJ, 1530 MatGetValues_MPISBAIJ, 1531 MatCopy_MPISBAIJ, 1532 /*45*/ MatPrintHelp_MPISBAIJ, 1533 MatScale_MPISBAIJ, 1534 0, 1535 0, 1536 0, 1537 /*50*/ 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 /*55*/ 0, 1543 0, 1544 MatSetUnfactored_MPISBAIJ, 1545 0, 1546 MatSetValuesBlocked_MPISBAIJ, 1547 /*60*/ 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 /*65*/ 0, 1553 0, 1554 0, 1555 0, 1556 0, 1557 /*70*/ MatGetRowMax_MPISBAIJ, 1558 0, 1559 0, 1560 0, 1561 0, 1562 /*75*/ 0, 1563 0, 1564 0, 1565 0, 1566 0, 1567 /*80*/ 0, 1568 0, 1569 0, 1570 0, 1571 MatLoad_MPISBAIJ, 1572 /*85*/ 0, 1573 0, 1574 0, 1575 0, 1576 0, 1577 /*90*/ 0, 1578 0, 1579 0, 1580 0, 1581 0, 1582 /*95*/ 0, 1583 0, 1584 0, 1585 0, 1586 0, 1587 /*100*/0, 1588 0, 1589 0, 1590 0, 1591 0, 1592 /*105*/0, 1593 MatRealPart_MPISBAIJ, 1594 MatImaginaryPart_MPISBAIJ, 1595 MatGetRowUpperTriangular_MPISBAIJ, 1596 MatRestoreRowUpperTriangular_MPISBAIJ 1597 }; 1598 1599 1600 EXTERN_C_BEGIN 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1603 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1604 { 1605 PetscFunctionBegin; 1606 *a = ((Mat_MPISBAIJ *)A->data)->A; 1607 *iscopy = PETSC_FALSE; 1608 PetscFunctionReturn(0); 1609 } 1610 EXTERN_C_END 1611 1612 EXTERN_C_BEGIN 1613 #undef __FUNCT__ 1614 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1615 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1616 { 1617 Mat_MPISBAIJ *b; 1618 PetscErrorCode ierr; 1619 PetscInt i,mbs,Mbs; 1620 1621 PetscFunctionBegin; 1622 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1623 1624 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1625 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1626 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1627 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1628 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1629 1630 B->rmap.bs = B->cmap.bs = bs; 1631 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1632 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1633 1634 if (d_nnz) { 1635 for (i=0; i<B->rmap.n/bs; i++) { 1636 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1637 } 1638 } 1639 if (o_nnz) { 1640 for (i=0; i<B->rmap.n/bs; i++) { 1641 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1642 } 1643 } 1644 B->preallocated = PETSC_TRUE; 1645 1646 b = (Mat_MPISBAIJ*)B->data; 1647 mbs = B->rmap.n/bs; 1648 Mbs = B->rmap.N/bs; 1649 if (mbs*bs != B->rmap.n) { 1650 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs); 1651 } 1652 1653 B->rmap.bs = bs; 1654 b->bs2 = bs*bs; 1655 b->mbs = mbs; 1656 b->nbs = mbs; 1657 b->Mbs = Mbs; 1658 b->Nbs = Mbs; 1659 1660 for (i=0; i<=b->size; i++) { 1661 b->rangebs[i] = B->rmap.range[i]/bs; 1662 } 1663 b->rstartbs = B->rmap.rstart/bs; 1664 b->rendbs = B->rmap.rend/bs; 1665 1666 b->cstartbs = B->cmap.rstart/bs; 1667 b->cendbs = B->cmap.rend/bs; 1668 1669 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1670 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1671 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1672 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1673 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1674 1675 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1676 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1677 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1678 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1679 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1680 1681 /* build cache for off array entries formed */ 1682 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1683 1684 PetscFunctionReturn(0); 1685 } 1686 EXTERN_C_END 1687 1688 /*MC 1689 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1690 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1691 1692 Options Database Keys: 1693 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1694 1695 Level: beginner 1696 1697 .seealso: MatCreateMPISBAIJ 1698 M*/ 1699 1700 EXTERN_C_BEGIN 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatCreate_MPISBAIJ" 1703 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1704 { 1705 Mat_MPISBAIJ *b; 1706 PetscErrorCode ierr; 1707 PetscTruth flg; 1708 1709 PetscFunctionBegin; 1710 1711 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1712 B->data = (void*)b; 1713 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1714 1715 B->ops->destroy = MatDestroy_MPISBAIJ; 1716 B->ops->view = MatView_MPISBAIJ; 1717 B->mapping = 0; 1718 B->factor = 0; 1719 B->assembled = PETSC_FALSE; 1720 1721 B->insertmode = NOT_SET_VALUES; 1722 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1723 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1724 1725 /* build local table of row and column ownerships */ 1726 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1727 1728 /* build cache for off array entries formed */ 1729 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1730 b->donotstash = PETSC_FALSE; 1731 b->colmap = PETSC_NULL; 1732 b->garray = PETSC_NULL; 1733 b->roworiented = PETSC_TRUE; 1734 1735 #if defined(PETSC_USE_MAT_SINGLE) 1736 /* stuff for MatSetValues_XXX in single precision */ 1737 b->setvalueslen = 0; 1738 b->setvaluescopy = PETSC_NULL; 1739 #endif 1740 1741 /* stuff used in block assembly */ 1742 b->barray = 0; 1743 1744 /* stuff used for matrix vector multiply */ 1745 b->lvec = 0; 1746 b->Mvctx = 0; 1747 b->slvec0 = 0; 1748 b->slvec0b = 0; 1749 b->slvec1 = 0; 1750 b->slvec1a = 0; 1751 b->slvec1b = 0; 1752 b->sMvctx = 0; 1753 1754 /* stuff for MatGetRow() */ 1755 b->rowindices = 0; 1756 b->rowvalues = 0; 1757 b->getrowactive = PETSC_FALSE; 1758 1759 /* hash table stuff */ 1760 b->ht = 0; 1761 b->hd = 0; 1762 b->ht_size = 0; 1763 b->ht_flag = PETSC_FALSE; 1764 b->ht_fact = 0; 1765 b->ht_total_ct = 0; 1766 b->ht_insert_ct = 0; 1767 1768 b->in_loc = 0; 1769 b->v_loc = 0; 1770 b->n_loc = 0; 1771 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1772 if (flg) { 1773 PetscReal fact = 1.39; 1774 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1775 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1776 if (fact <= 1.0) fact = 1.39; 1777 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1778 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1779 } 1780 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1781 "MatStoreValues_MPISBAIJ", 1782 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1783 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1784 "MatRetrieveValues_MPISBAIJ", 1785 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1786 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1787 "MatGetDiagonalBlock_MPISBAIJ", 1788 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1789 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1790 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1791 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1792 B->symmetric = PETSC_TRUE; 1793 B->structurally_symmetric = PETSC_TRUE; 1794 B->symmetric_set = PETSC_TRUE; 1795 B->structurally_symmetric_set = PETSC_TRUE; 1796 PetscFunctionReturn(0); 1797 } 1798 EXTERN_C_END 1799 1800 /*MC 1801 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1802 1803 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1804 and MATMPISBAIJ otherwise. 1805 1806 Options Database Keys: 1807 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1808 1809 Level: beginner 1810 1811 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1812 M*/ 1813 1814 EXTERN_C_BEGIN 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatCreate_SBAIJ" 1817 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1818 { 1819 PetscErrorCode ierr; 1820 PetscMPIInt size; 1821 1822 PetscFunctionBegin; 1823 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1824 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1825 if (size == 1) { 1826 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1827 } else { 1828 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1829 } 1830 PetscFunctionReturn(0); 1831 } 1832 EXTERN_C_END 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1836 /*@C 1837 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1838 the user should preallocate the matrix storage by setting the parameters 1839 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1840 performance can be increased by more than a factor of 50. 1841 1842 Collective on Mat 1843 1844 Input Parameters: 1845 + A - the matrix 1846 . bs - size of blockk 1847 . d_nz - number of block nonzeros per block row in diagonal portion of local 1848 submatrix (same for all local rows) 1849 . d_nnz - array containing the number of block nonzeros in the various block rows 1850 in the upper triangular and diagonal part of the in diagonal portion of the local 1851 (possibly different for each block row) or PETSC_NULL. You must leave room 1852 for the diagonal entry even if it is zero. 1853 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1854 submatrix (same for all local rows). 1855 - o_nnz - array containing the number of nonzeros in the various block rows of the 1856 off-diagonal portion of the local submatrix (possibly different for 1857 each block row) or PETSC_NULL. 1858 1859 1860 Options Database Keys: 1861 . -mat_no_unroll - uses code that does not unroll the loops in the 1862 block calculations (much slower) 1863 . -mat_block_size - size of the blocks to use 1864 1865 Notes: 1866 1867 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1868 than it must be used on all processors that share the object for that argument. 1869 1870 If the *_nnz parameter is given then the *_nz parameter is ignored 1871 1872 Storage Information: 1873 For a square global matrix we define each processor's diagonal portion 1874 to be its local rows and the corresponding columns (a square submatrix); 1875 each processor's off-diagonal portion encompasses the remainder of the 1876 local matrix (a rectangular submatrix). 1877 1878 The user can specify preallocated storage for the diagonal part of 1879 the local submatrix with either d_nz or d_nnz (not both). Set 1880 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1881 memory allocation. Likewise, specify preallocated storage for the 1882 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1883 1884 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1885 the figure below we depict these three local rows and all columns (0-11). 1886 1887 .vb 1888 0 1 2 3 4 5 6 7 8 9 10 11 1889 ------------------- 1890 row 3 | o o o d d d o o o o o o 1891 row 4 | o o o d d d o o o o o o 1892 row 5 | o o o d d d o o o o o o 1893 ------------------- 1894 .ve 1895 1896 Thus, any entries in the d locations are stored in the d (diagonal) 1897 submatrix, and any entries in the o locations are stored in the 1898 o (off-diagonal) submatrix. Note that the d matrix is stored in 1899 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1900 1901 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1902 plus the diagonal part of the d matrix, 1903 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1904 In general, for PDE problems in which most nonzeros are near the diagonal, 1905 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1906 or you will get TERRIBLE performance; see the users' manual chapter on 1907 matrices. 1908 1909 Level: intermediate 1910 1911 .keywords: matrix, block, aij, compressed row, sparse, parallel 1912 1913 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1914 @*/ 1915 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1916 { 1917 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1918 1919 PetscFunctionBegin; 1920 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1921 if (f) { 1922 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1923 } 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatCreateMPISBAIJ" 1929 /*@C 1930 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1931 (block compressed row). For good matrix assembly performance 1932 the user should preallocate the matrix storage by setting the parameters 1933 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1934 performance can be increased by more than a factor of 50. 1935 1936 Collective on MPI_Comm 1937 1938 Input Parameters: 1939 + comm - MPI communicator 1940 . bs - size of blockk 1941 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1942 This value should be the same as the local size used in creating the 1943 y vector for the matrix-vector product y = Ax. 1944 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1945 This value should be the same as the local size used in creating the 1946 x vector for the matrix-vector product y = Ax. 1947 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1948 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1949 . d_nz - number of block nonzeros per block row in diagonal portion of local 1950 submatrix (same for all local rows) 1951 . d_nnz - array containing the number of block nonzeros in the various block rows 1952 in the upper triangular portion of the in diagonal portion of the local 1953 (possibly different for each block block row) or PETSC_NULL. 1954 You must leave room for the diagonal entry even if it is zero. 1955 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1956 submatrix (same for all local rows). 1957 - o_nnz - array containing the number of nonzeros in the various block rows of the 1958 off-diagonal portion of the local submatrix (possibly different for 1959 each block row) or PETSC_NULL. 1960 1961 Output Parameter: 1962 . A - the matrix 1963 1964 Options Database Keys: 1965 . -mat_no_unroll - uses code that does not unroll the loops in the 1966 block calculations (much slower) 1967 . -mat_block_size - size of the blocks to use 1968 . -mat_mpi - use the parallel matrix data structures even on one processor 1969 (defaults to using SeqBAIJ format on one processor) 1970 1971 Notes: 1972 The number of rows and columns must be divisible by blocksize. 1973 1974 The user MUST specify either the local or global matrix dimensions 1975 (possibly both). 1976 1977 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1978 than it must be used on all processors that share the object for that argument. 1979 1980 If the *_nnz parameter is given then the *_nz parameter is ignored 1981 1982 Storage Information: 1983 For a square global matrix we define each processor's diagonal portion 1984 to be its local rows and the corresponding columns (a square submatrix); 1985 each processor's off-diagonal portion encompasses the remainder of the 1986 local matrix (a rectangular submatrix). 1987 1988 The user can specify preallocated storage for the diagonal part of 1989 the local submatrix with either d_nz or d_nnz (not both). Set 1990 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1991 memory allocation. Likewise, specify preallocated storage for the 1992 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1993 1994 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1995 the figure below we depict these three local rows and all columns (0-11). 1996 1997 .vb 1998 0 1 2 3 4 5 6 7 8 9 10 11 1999 ------------------- 2000 row 3 | o o o d d d o o o o o o 2001 row 4 | o o o d d d o o o o o o 2002 row 5 | o o o d d d o o o o o o 2003 ------------------- 2004 .ve 2005 2006 Thus, any entries in the d locations are stored in the d (diagonal) 2007 submatrix, and any entries in the o locations are stored in the 2008 o (off-diagonal) submatrix. Note that the d matrix is stored in 2009 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2010 2011 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2012 plus the diagonal part of the d matrix, 2013 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2014 In general, for PDE problems in which most nonzeros are near the diagonal, 2015 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2016 or you will get TERRIBLE performance; see the users' manual chapter on 2017 matrices. 2018 2019 Level: intermediate 2020 2021 .keywords: matrix, block, aij, compressed row, sparse, parallel 2022 2023 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2024 @*/ 2025 2026 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2027 { 2028 PetscErrorCode ierr; 2029 PetscMPIInt size; 2030 2031 PetscFunctionBegin; 2032 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2033 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2034 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2035 if (size > 1) { 2036 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2037 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2038 } else { 2039 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2040 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2041 } 2042 PetscFunctionReturn(0); 2043 } 2044 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2048 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2049 { 2050 Mat mat; 2051 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2052 PetscErrorCode ierr; 2053 PetscInt len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs; 2054 PetscScalar *array; 2055 2056 PetscFunctionBegin; 2057 *newmat = 0; 2058 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2059 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2060 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2061 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2062 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2063 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2064 2065 mat->factor = matin->factor; 2066 mat->preallocated = PETSC_TRUE; 2067 mat->assembled = PETSC_TRUE; 2068 mat->insertmode = NOT_SET_VALUES; 2069 2070 a = (Mat_MPISBAIJ*)mat->data; 2071 a->bs2 = oldmat->bs2; 2072 a->mbs = oldmat->mbs; 2073 a->nbs = oldmat->nbs; 2074 a->Mbs = oldmat->Mbs; 2075 a->Nbs = oldmat->Nbs; 2076 2077 2078 a->size = oldmat->size; 2079 a->rank = oldmat->rank; 2080 a->donotstash = oldmat->donotstash; 2081 a->roworiented = oldmat->roworiented; 2082 a->rowindices = 0; 2083 a->rowvalues = 0; 2084 a->getrowactive = PETSC_FALSE; 2085 a->barray = 0; 2086 a->rstartbs = oldmat->rstartbs; 2087 a->rendbs = oldmat->rendbs; 2088 a->cstartbs = oldmat->cstartbs; 2089 a->cendbs = oldmat->cendbs; 2090 2091 /* hash table stuff */ 2092 a->ht = 0; 2093 a->hd = 0; 2094 a->ht_size = 0; 2095 a->ht_flag = oldmat->ht_flag; 2096 a->ht_fact = oldmat->ht_fact; 2097 a->ht_total_ct = 0; 2098 a->ht_insert_ct = 0; 2099 2100 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2101 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2102 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2103 if (oldmat->colmap) { 2104 #if defined (PETSC_USE_CTABLE) 2105 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2106 #else 2107 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2108 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2109 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2110 #endif 2111 } else a->colmap = 0; 2112 2113 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2114 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2115 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2116 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2117 } else a->garray = 0; 2118 2119 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2120 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2121 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2122 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2123 2124 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2125 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2126 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2127 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2128 2129 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2130 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2131 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2132 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2133 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2134 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2135 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2136 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2137 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2138 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2139 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2140 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2141 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2142 2143 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2144 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2145 a->sMvctx = oldmat->sMvctx; 2146 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2147 2148 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2149 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2150 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2151 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2152 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2153 *newmat = mat; 2154 PetscFunctionReturn(0); 2155 } 2156 2157 #include "petscsys.h" 2158 2159 #undef __FUNCT__ 2160 #define __FUNCT__ "MatLoad_MPISBAIJ" 2161 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2162 { 2163 Mat A; 2164 PetscErrorCode ierr; 2165 PetscInt i,nz,j,rstart,rend; 2166 PetscScalar *vals,*buf; 2167 MPI_Comm comm = ((PetscObject)viewer)->comm; 2168 MPI_Status status; 2169 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 2170 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2171 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2172 PetscInt bs=1,Mbs,mbs,extra_rows; 2173 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2174 PetscInt dcount,kmax,k,nzcount,tmp; 2175 int fd; 2176 2177 PetscFunctionBegin; 2178 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2179 2180 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2181 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2182 if (!rank) { 2183 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2184 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2185 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2186 if (header[3] < 0) { 2187 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2188 } 2189 } 2190 2191 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2192 M = header[1]; N = header[2]; 2193 2194 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2195 2196 /* 2197 This code adds extra rows to make sure the number of rows is 2198 divisible by the blocksize 2199 */ 2200 Mbs = M/bs; 2201 extra_rows = bs - M + bs*(Mbs); 2202 if (extra_rows == bs) extra_rows = 0; 2203 else Mbs++; 2204 if (extra_rows &&!rank) { 2205 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2206 } 2207 2208 /* determine ownership of all rows */ 2209 mbs = Mbs/size + ((Mbs % size) > rank); 2210 m = mbs*bs; 2211 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2212 browners = rowners + size + 1; 2213 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2214 rowners[0] = 0; 2215 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2216 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2217 rstart = rowners[rank]; 2218 rend = rowners[rank+1]; 2219 2220 /* distribute row lengths to all processors */ 2221 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2222 if (!rank) { 2223 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2224 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2225 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2226 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2227 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2228 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2229 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2230 } else { 2231 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2232 } 2233 2234 if (!rank) { /* procs[0] */ 2235 /* calculate the number of nonzeros on each processor */ 2236 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2237 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2238 for (i=0; i<size; i++) { 2239 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2240 procsnz[i] += rowlengths[j]; 2241 } 2242 } 2243 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2244 2245 /* determine max buffer needed and allocate it */ 2246 maxnz = 0; 2247 for (i=0; i<size; i++) { 2248 maxnz = PetscMax(maxnz,procsnz[i]); 2249 } 2250 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2251 2252 /* read in my part of the matrix column indices */ 2253 nz = procsnz[0]; 2254 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2255 mycols = ibuf; 2256 if (size == 1) nz -= extra_rows; 2257 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2258 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2259 2260 /* read in every ones (except the last) and ship off */ 2261 for (i=1; i<size-1; i++) { 2262 nz = procsnz[i]; 2263 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2264 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2265 } 2266 /* read in the stuff for the last proc */ 2267 if (size != 1) { 2268 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2269 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2270 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2271 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2272 } 2273 ierr = PetscFree(cols);CHKERRQ(ierr); 2274 } else { /* procs[i], i>0 */ 2275 /* determine buffer space needed for message */ 2276 nz = 0; 2277 for (i=0; i<m; i++) { 2278 nz += locrowlens[i]; 2279 } 2280 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2281 mycols = ibuf; 2282 /* receive message of column indices*/ 2283 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2284 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2285 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2286 } 2287 2288 /* loop over local rows, determining number of off diagonal entries */ 2289 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2290 odlens = dlens + (rend-rstart); 2291 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2292 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2293 masked1 = mask + Mbs; 2294 masked2 = masked1 + Mbs; 2295 rowcount = 0; nzcount = 0; 2296 for (i=0; i<mbs; i++) { 2297 dcount = 0; 2298 odcount = 0; 2299 for (j=0; j<bs; j++) { 2300 kmax = locrowlens[rowcount]; 2301 for (k=0; k<kmax; k++) { 2302 tmp = mycols[nzcount++]/bs; /* block col. index */ 2303 if (!mask[tmp]) { 2304 mask[tmp] = 1; 2305 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2306 else masked1[dcount++] = tmp; /* entry in diag portion */ 2307 } 2308 } 2309 rowcount++; 2310 } 2311 2312 dlens[i] = dcount; /* d_nzz[i] */ 2313 odlens[i] = odcount; /* o_nzz[i] */ 2314 2315 /* zero out the mask elements we set */ 2316 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2317 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2318 } 2319 2320 /* create our matrix */ 2321 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2322 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2323 ierr = MatSetType(A,type);CHKERRQ(ierr); 2324 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2325 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2326 2327 if (!rank) { 2328 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2329 /* read in my part of the matrix numerical values */ 2330 nz = procsnz[0]; 2331 vals = buf; 2332 mycols = ibuf; 2333 if (size == 1) nz -= extra_rows; 2334 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2335 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2336 2337 /* insert into matrix */ 2338 jj = rstart*bs; 2339 for (i=0; i<m; i++) { 2340 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2341 mycols += locrowlens[i]; 2342 vals += locrowlens[i]; 2343 jj++; 2344 } 2345 2346 /* read in other processors (except the last one) and ship out */ 2347 for (i=1; i<size-1; i++) { 2348 nz = procsnz[i]; 2349 vals = buf; 2350 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2351 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2352 } 2353 /* the last proc */ 2354 if (size != 1){ 2355 nz = procsnz[i] - extra_rows; 2356 vals = buf; 2357 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2358 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2359 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2360 } 2361 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2362 2363 } else { 2364 /* receive numeric values */ 2365 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2366 2367 /* receive message of values*/ 2368 vals = buf; 2369 mycols = ibuf; 2370 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2371 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2372 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2373 2374 /* insert into matrix */ 2375 jj = rstart*bs; 2376 for (i=0; i<m; i++) { 2377 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2378 mycols += locrowlens[i]; 2379 vals += locrowlens[i]; 2380 jj++; 2381 } 2382 } 2383 2384 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2385 ierr = PetscFree(buf);CHKERRQ(ierr); 2386 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2387 ierr = PetscFree(rowners);CHKERRQ(ierr); 2388 ierr = PetscFree(dlens);CHKERRQ(ierr); 2389 ierr = PetscFree(mask);CHKERRQ(ierr); 2390 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2391 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2392 *newmat = A; 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2398 /*XXXXX@ 2399 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2400 2401 Input Parameters: 2402 . mat - the matrix 2403 . fact - factor 2404 2405 Collective on Mat 2406 2407 Level: advanced 2408 2409 Notes: 2410 This can also be set by the command line option: -mat_use_hash_table fact 2411 2412 .keywords: matrix, hashtable, factor, HT 2413 2414 .seealso: MatSetOption() 2415 @XXXXX*/ 2416 2417 2418 #undef __FUNCT__ 2419 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2420 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2421 { 2422 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2423 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2424 PetscReal atmp; 2425 PetscReal *work,*svalues,*rvalues; 2426 PetscErrorCode ierr; 2427 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2428 PetscMPIInt rank,size; 2429 PetscInt *rowners_bs,dest,count,source; 2430 PetscScalar *va; 2431 MatScalar *ba; 2432 MPI_Status stat; 2433 2434 PetscFunctionBegin; 2435 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2436 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2437 2438 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2439 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2440 2441 bs = A->rmap.bs; 2442 mbs = a->mbs; 2443 Mbs = a->Mbs; 2444 ba = b->a; 2445 bi = b->i; 2446 bj = b->j; 2447 2448 /* find ownerships */ 2449 rowners_bs = A->rmap.range; 2450 2451 /* each proc creates an array to be distributed */ 2452 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2453 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2454 2455 /* row_max for B */ 2456 if (rank != size-1){ 2457 for (i=0; i<mbs; i++) { 2458 ncols = bi[1] - bi[0]; bi++; 2459 brow = bs*i; 2460 for (j=0; j<ncols; j++){ 2461 bcol = bs*(*bj); 2462 for (kcol=0; kcol<bs; kcol++){ 2463 col = bcol + kcol; /* local col index */ 2464 col += rowners_bs[rank+1]; /* global col index */ 2465 for (krow=0; krow<bs; krow++){ 2466 atmp = PetscAbsScalar(*ba); ba++; 2467 row = brow + krow; /* local row index */ 2468 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2469 if (work[col] < atmp) work[col] = atmp; 2470 } 2471 } 2472 bj++; 2473 } 2474 } 2475 2476 /* send values to its owners */ 2477 for (dest=rank+1; dest<size; dest++){ 2478 svalues = work + rowners_bs[dest]; 2479 count = rowners_bs[dest+1]-rowners_bs[dest]; 2480 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2481 } 2482 } 2483 2484 /* receive values */ 2485 if (rank){ 2486 rvalues = work; 2487 count = rowners_bs[rank+1]-rowners_bs[rank]; 2488 for (source=0; source<rank; source++){ 2489 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2490 /* process values */ 2491 for (i=0; i<count; i++){ 2492 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2493 } 2494 } 2495 } 2496 2497 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2498 ierr = PetscFree(work);CHKERRQ(ierr); 2499 PetscFunctionReturn(0); 2500 } 2501 2502 #undef __FUNCT__ 2503 #define __FUNCT__ "MatRelax_MPISBAIJ" 2504 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2505 { 2506 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2507 PetscErrorCode ierr; 2508 PetscInt mbs=mat->mbs,bs=matin->rmap.bs; 2509 PetscScalar *x,*b,*ptr,zero=0.0; 2510 Vec bb1; 2511 2512 PetscFunctionBegin; 2513 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2514 if (bs > 1) 2515 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2516 2517 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2518 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2519 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2520 its--; 2521 } 2522 2523 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2524 while (its--){ 2525 2526 /* lower triangular part: slvec0b = - B^T*xx */ 2527 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2528 2529 /* copy xx into slvec0a */ 2530 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2531 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2532 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2533 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2534 2535 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2536 2537 /* copy bb into slvec1a */ 2538 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2539 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2540 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2541 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2542 2543 /* set slvec1b = 0 */ 2544 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2545 2546 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2547 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2548 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2549 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2550 2551 /* upper triangular part: bb1 = bb1 - B*x */ 2552 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2553 2554 /* local diagonal sweep */ 2555 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2556 } 2557 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2558 } else { 2559 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2560 } 2561 PetscFunctionReturn(0); 2562 } 2563 2564 #undef __FUNCT__ 2565 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2566 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2567 { 2568 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2569 PetscErrorCode ierr; 2570 Vec lvec1,bb1; 2571 2572 PetscFunctionBegin; 2573 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2574 if (matin->rmap.bs > 1) 2575 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2576 2577 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2578 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2579 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2580 its--; 2581 } 2582 2583 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2584 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2585 while (its--){ 2586 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2587 2588 /* lower diagonal part: bb1 = bb - B^T*xx */ 2589 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2590 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2591 2592 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2593 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2594 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2595 2596 /* upper diagonal part: bb1 = bb1 - B*x */ 2597 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2598 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2599 2600 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2601 2602 /* diagonal sweep */ 2603 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2604 } 2605 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2606 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2607 } else { 2608 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2609 } 2610 PetscFunctionReturn(0); 2611 } 2612 2613