1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 /* UGLY, ugly, ugly 24 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 25 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 26 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 27 converts the entries into single precision and then calls ..._MatScalar() to put them 28 into the single precision data structures. 29 */ 30 #if defined(PETSC_USE_MAT_SINGLE) 31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 #else 37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 38 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 40 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 42 #endif 43 44 EXTERN_C_BEGIN 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 47 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 48 { 49 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 EXTERN_C_END 58 59 EXTERN_C_BEGIN 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 62 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 63 { 64 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 69 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 EXTERN_C_END 73 74 75 #define CHUNKSIZE 10 76 77 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 78 { \ 79 \ 80 brow = row/bs; \ 81 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 82 rmax = aimax[brow]; nrow = ailen[brow]; \ 83 bcol = col/bs; \ 84 ridx = row % bs; cidx = col % bs; \ 85 low = 0; high = nrow; \ 86 while (high-low > 3) { \ 87 t = (low+high)/2; \ 88 if (rp[t] > bcol) high = t; \ 89 else low = t; \ 90 } \ 91 for (_i=low; _i<high; _i++) { \ 92 if (rp[_i] > bcol) break; \ 93 if (rp[_i] == bcol) { \ 94 bap = ap + bs2*_i + bs*cidx + ridx; \ 95 if (addv == ADD_VALUES) *bap += value; \ 96 else *bap = value; \ 97 goto a_noinsert; \ 98 } \ 99 } \ 100 if (a->nonew == 1) goto a_noinsert; \ 101 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 102 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 103 N = nrow++ - 1; \ 104 /* shift up all the later entries in this row */ \ 105 for (ii=N; ii>=_i; ii--) { \ 106 rp[ii+1] = rp[ii]; \ 107 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 108 } \ 109 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 110 rp[_i] = bcol; \ 111 ap[bs2*_i + bs*cidx + ridx] = value; \ 112 a_noinsert:; \ 113 ailen[brow] = nrow; \ 114 } 115 #ifndef MatSetValues_SeqBAIJ_B_Private 116 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 117 { \ 118 brow = row/bs; \ 119 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 120 rmax = bimax[brow]; nrow = bilen[brow]; \ 121 bcol = col/bs; \ 122 ridx = row % bs; cidx = col % bs; \ 123 low = 0; high = nrow; \ 124 while (high-low > 3) { \ 125 t = (low+high)/2; \ 126 if (rp[t] > bcol) high = t; \ 127 else low = t; \ 128 } \ 129 for (_i=low; _i<high; _i++) { \ 130 if (rp[_i] > bcol) break; \ 131 if (rp[_i] == bcol) { \ 132 bap = ap + bs2*_i + bs*cidx + ridx; \ 133 if (addv == ADD_VALUES) *bap += value; \ 134 else *bap = value; \ 135 goto b_noinsert; \ 136 } \ 137 } \ 138 if (b->nonew == 1) goto b_noinsert; \ 139 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 140 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 141 N = nrow++ - 1; \ 142 /* shift up all the later entries in this row */ \ 143 for (ii=N; ii>=_i; ii--) { \ 144 rp[ii+1] = rp[ii]; \ 145 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 146 } \ 147 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 148 rp[_i] = bcol; \ 149 ap[bs2*_i + bs*cidx + ridx] = value; \ 150 b_noinsert:; \ 151 bilen[brow] = nrow; \ 152 } 153 #endif 154 155 #if defined(PETSC_USE_MAT_SINGLE) 156 #undef __FUNCT__ 157 #define __FUNCT__ "MatSetValues_MPISBAIJ" 158 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 159 { 160 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 161 PetscErrorCode ierr; 162 PetscInt i,N = m*n; 163 MatScalar *vsingle; 164 165 PetscFunctionBegin; 166 if (N > b->setvalueslen) { 167 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 168 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 169 b->setvalueslen = N; 170 } 171 vsingle = b->setvaluescopy; 172 173 for (i=0; i<N; i++) { 174 vsingle[i] = v[i]; 175 } 176 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 182 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 183 { 184 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 185 PetscErrorCode ierr; 186 PetscInt i,N = m*n*b->bs2; 187 MatScalar *vsingle; 188 189 PetscFunctionBegin; 190 if (N > b->setvalueslen) { 191 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 192 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 193 b->setvalueslen = N; 194 } 195 vsingle = b->setvaluescopy; 196 for (i=0; i<N; i++) { 197 vsingle[i] = v[i]; 198 } 199 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 #endif 203 204 /* Only add/insert a(i,j) with i<=j (blocks). 205 Any a(i,j) with i>j input by user is ingored. 206 */ 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar" 209 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 210 { 211 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 212 MatScalar value; 213 PetscTruth roworiented = baij->roworiented; 214 PetscErrorCode ierr; 215 PetscInt i,j,row,col; 216 PetscInt rstart_orig=mat->rmap.rstart; 217 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 218 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 219 220 /* Some Variables required in the macro */ 221 Mat A = baij->A; 222 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 223 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 224 MatScalar *aa=a->a; 225 226 Mat B = baij->B; 227 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 228 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 229 MatScalar *ba=b->a; 230 231 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 232 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 233 MatScalar *ap,*bap; 234 235 /* for stash */ 236 PetscInt n_loc, *in_loc = PETSC_NULL; 237 MatScalar *v_loc = PETSC_NULL; 238 239 PetscFunctionBegin; 240 241 if (!baij->donotstash){ 242 if (n > baij->n_loc) { 243 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 244 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 245 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 246 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 247 baij->n_loc = n; 248 } 249 in_loc = baij->in_loc; 250 v_loc = baij->v_loc; 251 } 252 253 for (i=0; i<m; i++) { 254 if (im[i] < 0) continue; 255 #if defined(PETSC_USE_DEBUG) 256 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 257 #endif 258 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 259 row = im[i] - rstart_orig; /* local row index */ 260 for (j=0; j<n; j++) { 261 if (im[i]/bs > in[j]/bs){ 262 if (a->ignore_ltriangular){ 263 continue; /* ignore lower triangular blocks */ 264 } else { 265 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 266 } 267 } 268 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 269 col = in[j] - cstart_orig; /* local col index */ 270 brow = row/bs; bcol = col/bs; 271 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 272 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 273 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 274 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 275 } else if (in[j] < 0) continue; 276 #if defined(PETSC_USE_DEBUG) 277 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 278 #endif 279 else { /* off-diag entry (B) */ 280 if (mat->was_assembled) { 281 if (!baij->colmap) { 282 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 283 } 284 #if defined (PETSC_USE_CTABLE) 285 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 286 col = col - 1; 287 #else 288 col = baij->colmap[in[j]/bs] - 1; 289 #endif 290 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 291 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 292 col = in[j]; 293 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 294 B = baij->B; 295 b = (Mat_SeqBAIJ*)(B)->data; 296 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 297 ba=b->a; 298 } else col += in[j]%bs; 299 } else col = in[j]; 300 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 301 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 302 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 303 } 304 } 305 } else { /* off processor entry */ 306 if (!baij->donotstash) { 307 n_loc = 0; 308 for (j=0; j<n; j++){ 309 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 310 in_loc[n_loc] = in[j]; 311 if (roworiented) { 312 v_loc[n_loc] = v[i*n+j]; 313 } else { 314 v_loc[n_loc] = v[j*m+i]; 315 } 316 n_loc++; 317 } 318 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 319 } 320 } 321 } 322 PetscFunctionReturn(0); 323 } 324 325 #undef __FUNCT__ 326 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 327 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 328 { 329 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 330 const MatScalar *value; 331 MatScalar *barray=baij->barray; 332 PetscTruth roworiented = baij->roworiented; 333 PetscErrorCode ierr; 334 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 335 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 336 PetscInt cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2; 337 338 PetscFunctionBegin; 339 if(!barray) { 340 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 341 baij->barray = barray; 342 } 343 344 if (roworiented) { 345 stepval = (n-1)*bs; 346 } else { 347 stepval = (m-1)*bs; 348 } 349 for (i=0; i<m; i++) { 350 if (im[i] < 0) continue; 351 #if defined(PETSC_USE_DEBUG) 352 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 353 #endif 354 if (im[i] >= rstart && im[i] < rend) { 355 row = im[i] - rstart; 356 for (j=0; j<n; j++) { 357 /* If NumCol = 1 then a copy is not required */ 358 if ((roworiented) && (n == 1)) { 359 barray = (MatScalar*) v + i*bs2; 360 } else if((!roworiented) && (m == 1)) { 361 barray = (MatScalar*) v + j*bs2; 362 } else { /* Here a copy is required */ 363 if (roworiented) { 364 value = v + i*(stepval+bs)*bs + j*bs; 365 } else { 366 value = v + j*(stepval+bs)*bs + i*bs; 367 } 368 for (ii=0; ii<bs; ii++,value+=stepval) { 369 for (jj=0; jj<bs; jj++) { 370 *barray++ = *value++; 371 } 372 } 373 barray -=bs2; 374 } 375 376 if (in[j] >= cstart && in[j] < cend){ 377 col = in[j] - cstart; 378 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 379 } 380 else if (in[j] < 0) continue; 381 #if defined(PETSC_USE_DEBUG) 382 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 383 #endif 384 else { 385 if (mat->was_assembled) { 386 if (!baij->colmap) { 387 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 388 } 389 390 #if defined(PETSC_USE_DEBUG) 391 #if defined (PETSC_USE_CTABLE) 392 { PetscInt data; 393 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 394 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 395 } 396 #else 397 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 398 #endif 399 #endif 400 #if defined (PETSC_USE_CTABLE) 401 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 402 col = (col - 1)/bs; 403 #else 404 col = (baij->colmap[in[j]] - 1)/bs; 405 #endif 406 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 407 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 408 col = in[j]; 409 } 410 } 411 else col = in[j]; 412 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 413 } 414 } 415 } else { 416 if (!baij->donotstash) { 417 if (roworiented) { 418 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 419 } else { 420 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 421 } 422 } 423 } 424 } 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatGetValues_MPISBAIJ" 430 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 431 { 432 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 433 PetscErrorCode ierr; 434 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 435 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 436 437 PetscFunctionBegin; 438 for (i=0; i<m; i++) { 439 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 440 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 441 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 442 row = idxm[i] - bsrstart; 443 for (j=0; j<n; j++) { 444 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 445 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 446 if (idxn[j] >= bscstart && idxn[j] < bscend){ 447 col = idxn[j] - bscstart; 448 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 449 } else { 450 if (!baij->colmap) { 451 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 452 } 453 #if defined (PETSC_USE_CTABLE) 454 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 455 data --; 456 #else 457 data = baij->colmap[idxn[j]/bs]-1; 458 #endif 459 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 460 else { 461 col = data + idxn[j]%bs; 462 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 463 } 464 } 465 } 466 } else { 467 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 468 } 469 } 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNCT__ 474 #define __FUNCT__ "MatNorm_MPISBAIJ" 475 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 476 { 477 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 478 PetscErrorCode ierr; 479 PetscReal sum[2],*lnorm2; 480 481 PetscFunctionBegin; 482 if (baij->size == 1) { 483 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 484 } else { 485 if (type == NORM_FROBENIUS) { 486 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 487 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 488 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 489 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 490 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 491 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 492 *norm = sqrt(sum[0] + 2*sum[1]); 493 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 494 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 495 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 496 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 497 PetscReal *rsum,*rsum2,vabs; 498 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 499 PetscInt brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs; 500 MatScalar *v; 501 502 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 503 rsum2 = rsum + mat->cmap.N; 504 ierr = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 505 /* Amat */ 506 v = amat->a; jj = amat->j; 507 for (brow=0; brow<mbs; brow++) { 508 grow = bs*(rstart + brow); 509 nz = amat->i[brow+1] - amat->i[brow]; 510 for (bcol=0; bcol<nz; bcol++){ 511 gcol = bs*(rstart + *jj); jj++; 512 for (col=0; col<bs; col++){ 513 for (row=0; row<bs; row++){ 514 vabs = PetscAbsScalar(*v); v++; 515 rsum[gcol+col] += vabs; 516 /* non-diagonal block */ 517 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 518 } 519 } 520 } 521 } 522 /* Bmat */ 523 v = bmat->a; jj = bmat->j; 524 for (brow=0; brow<mbs; brow++) { 525 grow = bs*(rstart + brow); 526 nz = bmat->i[brow+1] - bmat->i[brow]; 527 for (bcol=0; bcol<nz; bcol++){ 528 gcol = bs*garray[*jj]; jj++; 529 for (col=0; col<bs; col++){ 530 for (row=0; row<bs; row++){ 531 vabs = PetscAbsScalar(*v); v++; 532 rsum[gcol+col] += vabs; 533 rsum[grow+row] += vabs; 534 } 535 } 536 } 537 } 538 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 539 *norm = 0.0; 540 for (col=0; col<mat->cmap.N; col++) { 541 if (rsum2[col] > *norm) *norm = rsum2[col]; 542 } 543 ierr = PetscFree(rsum);CHKERRQ(ierr); 544 } else { 545 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 546 } 547 } 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 553 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 554 { 555 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 556 PetscErrorCode ierr; 557 PetscInt nstash,reallocs; 558 InsertMode addv; 559 560 PetscFunctionBegin; 561 if (baij->donotstash) { 562 PetscFunctionReturn(0); 563 } 564 565 /* make sure all processors are either in INSERTMODE or ADDMODE */ 566 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 567 if (addv == (ADD_VALUES|INSERT_VALUES)) { 568 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 569 } 570 mat->insertmode = addv; /* in case this processor had no cache */ 571 572 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 573 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 574 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 575 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 576 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 577 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 583 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 584 { 585 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 586 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 589 PetscInt *row,*col,other_disassembled; 590 PetscMPIInt n; 591 PetscTruth r1,r2,r3; 592 MatScalar *val; 593 InsertMode addv = mat->insertmode; 594 595 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 596 PetscFunctionBegin; 597 598 if (!baij->donotstash) { 599 while (1) { 600 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 601 if (!flg) break; 602 603 for (i=0; i<n;) { 604 /* Now identify the consecutive vals belonging to the same row */ 605 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 606 if (j < n) ncols = j-i; 607 else ncols = n-i; 608 /* Now assemble all these values with a single function call */ 609 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 610 i = j; 611 } 612 } 613 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 614 /* Now process the block-stash. Since the values are stashed column-oriented, 615 set the roworiented flag to column oriented, and after MatSetValues() 616 restore the original flags */ 617 r1 = baij->roworiented; 618 r2 = a->roworiented; 619 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 620 baij->roworiented = PETSC_FALSE; 621 a->roworiented = PETSC_FALSE; 622 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 623 while (1) { 624 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 625 if (!flg) break; 626 627 for (i=0; i<n;) { 628 /* Now identify the consecutive vals belonging to the same row */ 629 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 630 if (j < n) ncols = j-i; 631 else ncols = n-i; 632 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 633 i = j; 634 } 635 } 636 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 637 baij->roworiented = r1; 638 a->roworiented = r2; 639 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 640 } 641 642 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 643 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 644 645 /* determine if any processor has disassembled, if so we must 646 also disassemble ourselfs, in order that we may reassemble. */ 647 /* 648 if nonzero structure of submatrix B cannot change then we know that 649 no processor disassembled thus we can skip this stuff 650 */ 651 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 652 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 653 if (mat->was_assembled && !other_disassembled) { 654 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 655 } 656 } 657 658 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 659 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 660 } 661 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 662 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 663 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 664 665 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 666 baij->rowvalues = 0; 667 668 PetscFunctionReturn(0); 669 } 670 671 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 675 { 676 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 677 PetscErrorCode ierr; 678 PetscInt bs = mat->rmap.bs; 679 PetscMPIInt size = baij->size,rank = baij->rank; 680 PetscTruth iascii,isdraw; 681 PetscViewer sviewer; 682 PetscViewerFormat format; 683 684 PetscFunctionBegin; 685 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 686 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 687 if (iascii) { 688 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 689 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 690 MatInfo info; 691 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 692 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 694 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 695 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 696 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 697 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 698 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 699 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 700 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 701 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 702 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } else if (format == PETSC_VIEWER_ASCII_INFO) { 705 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 708 PetscFunctionReturn(0); 709 } 710 } 711 712 if (isdraw) { 713 PetscDraw draw; 714 PetscTruth isnull; 715 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 716 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 717 } 718 719 if (size == 1) { 720 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 721 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 722 } else { 723 /* assemble the entire matrix onto first processor. */ 724 Mat A; 725 Mat_SeqSBAIJ *Aloc; 726 Mat_SeqBAIJ *Bloc; 727 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 728 MatScalar *a; 729 730 /* Should this be the same type as mat? */ 731 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 732 if (!rank) { 733 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 734 } else { 735 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 736 } 737 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 738 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 739 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 740 741 /* copy over the A part */ 742 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 743 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 744 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 745 746 for (i=0; i<mbs; i++) { 747 rvals[0] = bs*(baij->rstartbs + i); 748 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 749 for (j=ai[i]; j<ai[i+1]; j++) { 750 col = (baij->cstartbs+aj[j])*bs; 751 for (k=0; k<bs; k++) { 752 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 753 col++; a += bs; 754 } 755 } 756 } 757 /* copy over the B part */ 758 Bloc = (Mat_SeqBAIJ*)baij->B->data; 759 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 760 for (i=0; i<mbs; i++) { 761 762 rvals[0] = bs*(baij->rstartbs + i); 763 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 col = baij->garray[aj[j]]*bs; 766 for (k=0; k<bs; k++) { 767 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 768 col++; a += bs; 769 } 770 } 771 } 772 ierr = PetscFree(rvals);CHKERRQ(ierr); 773 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 774 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 775 /* 776 Everyone has to call to draw the matrix since the graphics waits are 777 synchronized across all processors that share the PetscDraw object 778 */ 779 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 780 if (!rank) { 781 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 782 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 783 } 784 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 785 ierr = MatDestroy(A);CHKERRQ(ierr); 786 } 787 PetscFunctionReturn(0); 788 } 789 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatView_MPISBAIJ" 792 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 793 { 794 PetscErrorCode ierr; 795 PetscTruth iascii,isdraw,issocket,isbinary; 796 797 PetscFunctionBegin; 798 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 799 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 800 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 801 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 802 if (iascii || isdraw || issocket || isbinary) { 803 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 804 } else { 805 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 806 } 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatDestroy_MPISBAIJ" 812 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 813 { 814 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 815 PetscErrorCode ierr; 816 817 PetscFunctionBegin; 818 #if defined(PETSC_USE_LOG) 819 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 820 #endif 821 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 822 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 823 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 824 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 825 #if defined (PETSC_USE_CTABLE) 826 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 827 #else 828 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 829 #endif 830 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 831 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 832 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 833 if (baij->slvec0) { 834 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 835 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 836 } 837 if (baij->slvec1) { 838 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 839 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 840 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 841 } 842 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 843 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 844 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 845 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 846 #if defined(PETSC_USE_MAT_SINGLE) 847 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 848 #endif 849 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 850 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 851 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 852 ierr = PetscFree(baij);CHKERRQ(ierr); 853 854 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 855 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 856 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 857 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "MatMult_MPISBAIJ" 864 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 865 { 866 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 867 PetscErrorCode ierr; 868 PetscInt nt,mbs=a->mbs,bs=A->rmap.bs; 869 PetscScalar *x,*from,zero=0.0; 870 871 PetscFunctionBegin; 872 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 873 if (nt != A->cmap.n) { 874 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 875 } 876 877 /* diagonal part */ 878 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 879 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 880 881 /* subdiagonal part */ 882 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 883 884 /* copy x into the vec slvec0 */ 885 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 886 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 887 888 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 889 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 890 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 891 892 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 /* supperdiagonal part */ 895 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 896 PetscFunctionReturn(0); 897 } 898 899 #undef __FUNCT__ 900 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 901 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 902 { 903 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 904 PetscErrorCode ierr; 905 PetscInt nt; 906 907 PetscFunctionBegin; 908 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 909 if (nt != A->cmap.n) { 910 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 911 } 912 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 913 if (nt != A->rmap.N) { 914 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 915 } 916 917 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 918 /* do diagonal part */ 919 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 920 /* do supperdiagonal part */ 921 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 923 /* do subdiagonal part */ 924 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 925 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 926 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 927 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 933 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 934 { 935 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt mbs=a->mbs,bs=A->rmap.bs; 938 PetscScalar *x,*from,zero=0.0; 939 940 PetscFunctionBegin; 941 /* 942 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 943 PetscSynchronizedFlush(((PetscObject)A)->comm); 944 */ 945 /* diagonal part */ 946 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 947 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 948 949 /* subdiagonal part */ 950 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 951 952 /* copy x into the vec slvec0 */ 953 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 954 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 955 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 956 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 957 958 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 960 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961 962 /* supperdiagonal part */ 963 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 964 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 970 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 971 { 972 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 973 PetscErrorCode ierr; 974 975 PetscFunctionBegin; 976 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 977 /* do diagonal part */ 978 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 979 /* do supperdiagonal part */ 980 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 982 983 /* do subdiagonal part */ 984 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 985 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 986 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 987 988 PetscFunctionReturn(0); 989 } 990 991 /* 992 This only works correctly for square matrices where the subblock A->A is the 993 diagonal block 994 */ 995 #undef __FUNCT__ 996 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 997 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 998 { 999 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1000 PetscErrorCode ierr; 1001 1002 PetscFunctionBegin; 1003 /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1004 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatScale_MPISBAIJ" 1010 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1011 { 1012 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1013 PetscErrorCode ierr; 1014 1015 PetscFunctionBegin; 1016 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1017 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1023 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1024 { 1025 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1026 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1027 PetscErrorCode ierr; 1028 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1029 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1030 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1031 1032 PetscFunctionBegin; 1033 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1034 mat->getrowactive = PETSC_TRUE; 1035 1036 if (!mat->rowvalues && (idx || v)) { 1037 /* 1038 allocate enough space to hold information from the longest row. 1039 */ 1040 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1041 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1042 PetscInt max = 1,mbs = mat->mbs,tmp; 1043 for (i=0; i<mbs; i++) { 1044 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1045 if (max < tmp) { max = tmp; } 1046 } 1047 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1048 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1049 } 1050 1051 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1052 lrow = row - brstart; /* local row index */ 1053 1054 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1055 if (!v) {pvA = 0; pvB = 0;} 1056 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1057 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1058 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1059 nztot = nzA + nzB; 1060 1061 cmap = mat->garray; 1062 if (v || idx) { 1063 if (nztot) { 1064 /* Sort by increasing column numbers, assuming A and B already sorted */ 1065 PetscInt imark = -1; 1066 if (v) { 1067 *v = v_p = mat->rowvalues; 1068 for (i=0; i<nzB; i++) { 1069 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1070 else break; 1071 } 1072 imark = i; 1073 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1074 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1075 } 1076 if (idx) { 1077 *idx = idx_p = mat->rowindices; 1078 if (imark > -1) { 1079 for (i=0; i<imark; i++) { 1080 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1081 } 1082 } else { 1083 for (i=0; i<nzB; i++) { 1084 if (cmap[cworkB[i]/bs] < cstart) 1085 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1086 else break; 1087 } 1088 imark = i; 1089 } 1090 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1091 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1092 } 1093 } else { 1094 if (idx) *idx = 0; 1095 if (v) *v = 0; 1096 } 1097 } 1098 *nz = nztot; 1099 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1100 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1106 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1107 { 1108 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1109 1110 PetscFunctionBegin; 1111 if (!baij->getrowactive) { 1112 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1113 } 1114 baij->getrowactive = PETSC_FALSE; 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1120 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1121 { 1122 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1123 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1124 1125 PetscFunctionBegin; 1126 aA->getrow_utriangular = PETSC_TRUE; 1127 PetscFunctionReturn(0); 1128 } 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1131 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1132 { 1133 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1134 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1135 1136 PetscFunctionBegin; 1137 aA->getrow_utriangular = PETSC_FALSE; 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1143 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1144 { 1145 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1146 PetscErrorCode ierr; 1147 1148 PetscFunctionBegin; 1149 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1150 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1156 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1157 { 1158 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1159 PetscErrorCode ierr; 1160 1161 PetscFunctionBegin; 1162 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1163 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1169 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1170 { 1171 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1172 PetscErrorCode ierr; 1173 1174 PetscFunctionBegin; 1175 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1176 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1182 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1183 { 1184 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1185 Mat A = a->A,B = a->B; 1186 PetscErrorCode ierr; 1187 PetscReal isend[5],irecv[5]; 1188 1189 PetscFunctionBegin; 1190 info->block_size = (PetscReal)matin->rmap.bs; 1191 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1192 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1193 isend[3] = info->memory; isend[4] = info->mallocs; 1194 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1195 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1196 isend[3] += info->memory; isend[4] += info->mallocs; 1197 if (flag == MAT_LOCAL) { 1198 info->nz_used = isend[0]; 1199 info->nz_allocated = isend[1]; 1200 info->nz_unneeded = isend[2]; 1201 info->memory = isend[3]; 1202 info->mallocs = isend[4]; 1203 } else if (flag == MAT_GLOBAL_MAX) { 1204 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1205 info->nz_used = irecv[0]; 1206 info->nz_allocated = irecv[1]; 1207 info->nz_unneeded = irecv[2]; 1208 info->memory = irecv[3]; 1209 info->mallocs = irecv[4]; 1210 } else if (flag == MAT_GLOBAL_SUM) { 1211 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1212 info->nz_used = irecv[0]; 1213 info->nz_allocated = irecv[1]; 1214 info->nz_unneeded = irecv[2]; 1215 info->memory = irecv[3]; 1216 info->mallocs = irecv[4]; 1217 } else { 1218 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1219 } 1220 info->rows_global = (PetscReal)A->rmap.N; 1221 info->columns_global = (PetscReal)A->cmap.N; 1222 info->rows_local = (PetscReal)A->rmap.N; 1223 info->columns_local = (PetscReal)A->cmap.N; 1224 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1225 info->fill_ratio_needed = 0; 1226 info->factor_mallocs = 0; 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1232 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg) 1233 { 1234 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1235 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 switch (op) { 1240 case MAT_NEW_NONZERO_LOCATIONS: 1241 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1242 case MAT_KEEP_ZEROED_ROWS: 1243 case MAT_NEW_NONZERO_LOCATION_ERR: 1244 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1245 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1246 break; 1247 case MAT_ROW_ORIENTED: 1248 a->roworiented = flg; 1249 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1250 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1251 break; 1252 case MAT_NEW_DIAGONALS: 1253 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1254 break; 1255 case MAT_IGNORE_OFF_PROC_ENTRIES: 1256 a->donotstash = flg; 1257 break; 1258 case MAT_USE_HASH_TABLE: 1259 a->ht_flag = flg; 1260 break; 1261 case MAT_HERMITIAN: 1262 if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1263 case MAT_SYMMETRIC: 1264 case MAT_STRUCTURALLY_SYMMETRIC: 1265 case MAT_SYMMETRY_ETERNAL: 1266 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1267 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1268 break; 1269 case MAT_IGNORE_LOWER_TRIANGULAR: 1270 aA->ignore_ltriangular = flg; 1271 break; 1272 case MAT_ERROR_LOWER_TRIANGULAR: 1273 aA->ignore_ltriangular = flg; 1274 break; 1275 case MAT_GETROW_UPPERTRIANGULAR: 1276 aA->getrow_utriangular = flg; 1277 break; 1278 default: 1279 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1286 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1287 { 1288 PetscErrorCode ierr; 1289 PetscFunctionBegin; 1290 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNCT__ 1295 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1296 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1297 { 1298 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1299 Mat a=baij->A, b=baij->B; 1300 PetscErrorCode ierr; 1301 PetscInt nv,m,n; 1302 PetscTruth flg; 1303 1304 PetscFunctionBegin; 1305 if (ll != rr){ 1306 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1307 if (!flg) 1308 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1309 } 1310 if (!ll) PetscFunctionReturn(0); 1311 1312 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1313 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1314 1315 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1316 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1317 1318 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1319 1320 /* left diagonalscale the off-diagonal part */ 1321 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1322 1323 /* scale the diagonal part */ 1324 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1325 1326 /* right diagonalscale the off-diagonal part */ 1327 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1328 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1334 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1335 { 1336 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1337 PetscErrorCode ierr; 1338 1339 PetscFunctionBegin; 1340 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatEqual_MPISBAIJ" 1348 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1349 { 1350 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1351 Mat a,b,c,d; 1352 PetscTruth flg; 1353 PetscErrorCode ierr; 1354 1355 PetscFunctionBegin; 1356 a = matA->A; b = matA->B; 1357 c = matB->A; d = matB->B; 1358 1359 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1360 if (flg) { 1361 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1362 } 1363 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatCopy_MPISBAIJ" 1369 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1370 { 1371 PetscErrorCode ierr; 1372 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1373 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1374 1375 PetscFunctionBegin; 1376 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1377 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1378 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1379 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1380 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1381 } else { 1382 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1383 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1384 } 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1390 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1391 { 1392 PetscErrorCode ierr; 1393 1394 PetscFunctionBegin; 1395 ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1396 PetscFunctionReturn(0); 1397 } 1398 1399 #include "petscblaslapack.h" 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1402 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1403 { 1404 PetscErrorCode ierr; 1405 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1406 PetscBLASInt bnz,one=1; 1407 Mat_SeqSBAIJ *xa,*ya; 1408 Mat_SeqBAIJ *xb,*yb; 1409 1410 PetscFunctionBegin; 1411 if (str == SAME_NONZERO_PATTERN) { 1412 PetscScalar alpha = a; 1413 xa = (Mat_SeqSBAIJ *)xx->A->data; 1414 ya = (Mat_SeqSBAIJ *)yy->A->data; 1415 bnz = (PetscBLASInt)xa->nz; 1416 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1417 xb = (Mat_SeqBAIJ *)xx->B->data; 1418 yb = (Mat_SeqBAIJ *)yy->B->data; 1419 bnz = (PetscBLASInt)xb->nz; 1420 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1421 } else { 1422 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1423 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1424 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1425 } 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1431 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1432 { 1433 PetscErrorCode ierr; 1434 PetscInt i; 1435 PetscTruth flg; 1436 1437 PetscFunctionBegin; 1438 for (i=0; i<n; i++) { 1439 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1440 if (!flg) { 1441 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1442 } 1443 } 1444 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 1449 /* -------------------------------------------------------------------*/ 1450 static struct _MatOps MatOps_Values = { 1451 MatSetValues_MPISBAIJ, 1452 MatGetRow_MPISBAIJ, 1453 MatRestoreRow_MPISBAIJ, 1454 MatMult_MPISBAIJ, 1455 /* 4*/ MatMultAdd_MPISBAIJ, 1456 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1457 MatMultAdd_MPISBAIJ, 1458 0, 1459 0, 1460 0, 1461 /*10*/ 0, 1462 0, 1463 0, 1464 MatRelax_MPISBAIJ, 1465 MatTranspose_MPISBAIJ, 1466 /*15*/ MatGetInfo_MPISBAIJ, 1467 MatEqual_MPISBAIJ, 1468 MatGetDiagonal_MPISBAIJ, 1469 MatDiagonalScale_MPISBAIJ, 1470 MatNorm_MPISBAIJ, 1471 /*20*/ MatAssemblyBegin_MPISBAIJ, 1472 MatAssemblyEnd_MPISBAIJ, 1473 0, 1474 MatSetOption_MPISBAIJ, 1475 MatZeroEntries_MPISBAIJ, 1476 /*25*/ 0, 1477 0, 1478 0, 1479 0, 1480 0, 1481 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1482 0, 1483 0, 1484 0, 1485 0, 1486 /*35*/ MatDuplicate_MPISBAIJ, 1487 0, 1488 0, 1489 0, 1490 0, 1491 /*40*/ MatAXPY_MPISBAIJ, 1492 MatGetSubMatrices_MPISBAIJ, 1493 MatIncreaseOverlap_MPISBAIJ, 1494 MatGetValues_MPISBAIJ, 1495 MatCopy_MPISBAIJ, 1496 /*45*/ 0, 1497 MatScale_MPISBAIJ, 1498 0, 1499 0, 1500 0, 1501 /*50*/ 0, 1502 0, 1503 0, 1504 0, 1505 0, 1506 /*55*/ 0, 1507 0, 1508 MatSetUnfactored_MPISBAIJ, 1509 0, 1510 MatSetValuesBlocked_MPISBAIJ, 1511 /*60*/ 0, 1512 0, 1513 0, 1514 0, 1515 0, 1516 /*65*/ 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 /*70*/ MatGetRowMaxAbs_MPISBAIJ, 1522 0, 1523 0, 1524 0, 1525 0, 1526 /*75*/ 0, 1527 0, 1528 0, 1529 0, 1530 0, 1531 /*80*/ 0, 1532 0, 1533 0, 1534 0, 1535 MatLoad_MPISBAIJ, 1536 /*85*/ 0, 1537 0, 1538 0, 1539 0, 1540 0, 1541 /*90*/ 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 /*95*/ 0, 1547 0, 1548 0, 1549 0, 1550 0, 1551 /*100*/0, 1552 0, 1553 0, 1554 0, 1555 0, 1556 /*105*/0, 1557 MatRealPart_MPISBAIJ, 1558 MatImaginaryPart_MPISBAIJ, 1559 MatGetRowUpperTriangular_MPISBAIJ, 1560 MatRestoreRowUpperTriangular_MPISBAIJ 1561 }; 1562 1563 1564 EXTERN_C_BEGIN 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1567 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1568 { 1569 PetscFunctionBegin; 1570 *a = ((Mat_MPISBAIJ *)A->data)->A; 1571 *iscopy = PETSC_FALSE; 1572 PetscFunctionReturn(0); 1573 } 1574 EXTERN_C_END 1575 1576 EXTERN_C_BEGIN 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1579 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1580 { 1581 Mat_MPISBAIJ *b; 1582 PetscErrorCode ierr; 1583 PetscInt i,mbs,Mbs; 1584 1585 PetscFunctionBegin; 1586 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1587 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 1588 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1589 1590 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1591 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1592 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1593 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1594 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1595 1596 B->rmap.bs = B->cmap.bs = bs; 1597 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1598 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1599 1600 if (d_nnz) { 1601 for (i=0; i<B->rmap.n/bs; i++) { 1602 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1603 } 1604 } 1605 if (o_nnz) { 1606 for (i=0; i<B->rmap.n/bs; i++) { 1607 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1608 } 1609 } 1610 B->preallocated = PETSC_TRUE; 1611 1612 b = (Mat_MPISBAIJ*)B->data; 1613 mbs = B->rmap.n/bs; 1614 Mbs = B->rmap.N/bs; 1615 if (mbs*bs != B->rmap.n) { 1616 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs); 1617 } 1618 1619 B->rmap.bs = bs; 1620 b->bs2 = bs*bs; 1621 b->mbs = mbs; 1622 b->nbs = mbs; 1623 b->Mbs = Mbs; 1624 b->Nbs = Mbs; 1625 1626 for (i=0; i<=b->size; i++) { 1627 b->rangebs[i] = B->rmap.range[i]/bs; 1628 } 1629 b->rstartbs = B->rmap.rstart/bs; 1630 b->rendbs = B->rmap.rend/bs; 1631 1632 b->cstartbs = B->cmap.rstart/bs; 1633 b->cendbs = B->cmap.rend/bs; 1634 1635 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1636 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1637 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1638 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1639 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1640 1641 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1642 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1643 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1644 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1645 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1646 1647 /* build cache for off array entries formed */ 1648 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1649 1650 PetscFunctionReturn(0); 1651 } 1652 EXTERN_C_END 1653 1654 /*MC 1655 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1656 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1657 1658 Options Database Keys: 1659 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1660 1661 Level: beginner 1662 1663 .seealso: MatCreateMPISBAIJ 1664 M*/ 1665 1666 EXTERN_C_BEGIN 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatCreate_MPISBAIJ" 1669 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1670 { 1671 Mat_MPISBAIJ *b; 1672 PetscErrorCode ierr; 1673 PetscTruth flg; 1674 1675 PetscFunctionBegin; 1676 1677 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1678 B->data = (void*)b; 1679 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1680 1681 B->ops->destroy = MatDestroy_MPISBAIJ; 1682 B->ops->view = MatView_MPISBAIJ; 1683 B->mapping = 0; 1684 B->factor = 0; 1685 B->assembled = PETSC_FALSE; 1686 1687 B->insertmode = NOT_SET_VALUES; 1688 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1689 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1690 1691 /* build local table of row and column ownerships */ 1692 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1693 1694 /* build cache for off array entries formed */ 1695 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1696 b->donotstash = PETSC_FALSE; 1697 b->colmap = PETSC_NULL; 1698 b->garray = PETSC_NULL; 1699 b->roworiented = PETSC_TRUE; 1700 1701 #if defined(PETSC_USE_MAT_SINGLE) 1702 /* stuff for MatSetValues_XXX in single precision */ 1703 b->setvalueslen = 0; 1704 b->setvaluescopy = PETSC_NULL; 1705 #endif 1706 1707 /* stuff used in block assembly */ 1708 b->barray = 0; 1709 1710 /* stuff used for matrix vector multiply */ 1711 b->lvec = 0; 1712 b->Mvctx = 0; 1713 b->slvec0 = 0; 1714 b->slvec0b = 0; 1715 b->slvec1 = 0; 1716 b->slvec1a = 0; 1717 b->slvec1b = 0; 1718 b->sMvctx = 0; 1719 1720 /* stuff for MatGetRow() */ 1721 b->rowindices = 0; 1722 b->rowvalues = 0; 1723 b->getrowactive = PETSC_FALSE; 1724 1725 /* hash table stuff */ 1726 b->ht = 0; 1727 b->hd = 0; 1728 b->ht_size = 0; 1729 b->ht_flag = PETSC_FALSE; 1730 b->ht_fact = 0; 1731 b->ht_total_ct = 0; 1732 b->ht_insert_ct = 0; 1733 1734 b->in_loc = 0; 1735 b->v_loc = 0; 1736 b->n_loc = 0; 1737 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1738 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1739 if (flg) { 1740 PetscReal fact = 1.39; 1741 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1742 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1743 if (fact <= 1.0) fact = 1.39; 1744 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1745 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1746 } 1747 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1748 1749 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1750 "MatStoreValues_MPISBAIJ", 1751 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1752 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1753 "MatRetrieveValues_MPISBAIJ", 1754 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1755 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1756 "MatGetDiagonalBlock_MPISBAIJ", 1757 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1758 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1759 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1760 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1761 B->symmetric = PETSC_TRUE; 1762 B->structurally_symmetric = PETSC_TRUE; 1763 B->symmetric_set = PETSC_TRUE; 1764 B->structurally_symmetric_set = PETSC_TRUE; 1765 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1766 PetscFunctionReturn(0); 1767 } 1768 EXTERN_C_END 1769 1770 /*MC 1771 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1772 1773 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1774 and MATMPISBAIJ otherwise. 1775 1776 Options Database Keys: 1777 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1778 1779 Level: beginner 1780 1781 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1782 M*/ 1783 1784 EXTERN_C_BEGIN 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatCreate_SBAIJ" 1787 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1788 { 1789 PetscErrorCode ierr; 1790 PetscMPIInt size; 1791 1792 PetscFunctionBegin; 1793 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1794 if (size == 1) { 1795 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1796 } else { 1797 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1798 } 1799 PetscFunctionReturn(0); 1800 } 1801 EXTERN_C_END 1802 1803 #undef __FUNCT__ 1804 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1805 /*@C 1806 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1807 the user should preallocate the matrix storage by setting the parameters 1808 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1809 performance can be increased by more than a factor of 50. 1810 1811 Collective on Mat 1812 1813 Input Parameters: 1814 + A - the matrix 1815 . bs - size of blockk 1816 . d_nz - number of block nonzeros per block row in diagonal portion of local 1817 submatrix (same for all local rows) 1818 . d_nnz - array containing the number of block nonzeros in the various block rows 1819 in the upper triangular and diagonal part of the in diagonal portion of the local 1820 (possibly different for each block row) or PETSC_NULL. You must leave room 1821 for the diagonal entry even if it is zero. 1822 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1823 submatrix (same for all local rows). 1824 - o_nnz - array containing the number of nonzeros in the various block rows of the 1825 off-diagonal portion of the local submatrix (possibly different for 1826 each block row) or PETSC_NULL. 1827 1828 1829 Options Database Keys: 1830 . -mat_no_unroll - uses code that does not unroll the loops in the 1831 block calculations (much slower) 1832 . -mat_block_size - size of the blocks to use 1833 1834 Notes: 1835 1836 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1837 than it must be used on all processors that share the object for that argument. 1838 1839 If the *_nnz parameter is given then the *_nz parameter is ignored 1840 1841 Storage Information: 1842 For a square global matrix we define each processor's diagonal portion 1843 to be its local rows and the corresponding columns (a square submatrix); 1844 each processor's off-diagonal portion encompasses the remainder of the 1845 local matrix (a rectangular submatrix). 1846 1847 The user can specify preallocated storage for the diagonal part of 1848 the local submatrix with either d_nz or d_nnz (not both). Set 1849 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1850 memory allocation. Likewise, specify preallocated storage for the 1851 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1852 1853 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1854 the figure below we depict these three local rows and all columns (0-11). 1855 1856 .vb 1857 0 1 2 3 4 5 6 7 8 9 10 11 1858 ------------------- 1859 row 3 | o o o d d d o o o o o o 1860 row 4 | o o o d d d o o o o o o 1861 row 5 | o o o d d d o o o o o o 1862 ------------------- 1863 .ve 1864 1865 Thus, any entries in the d locations are stored in the d (diagonal) 1866 submatrix, and any entries in the o locations are stored in the 1867 o (off-diagonal) submatrix. Note that the d matrix is stored in 1868 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1869 1870 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1871 plus the diagonal part of the d matrix, 1872 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1873 In general, for PDE problems in which most nonzeros are near the diagonal, 1874 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1875 or you will get TERRIBLE performance; see the users' manual chapter on 1876 matrices. 1877 1878 Level: intermediate 1879 1880 .keywords: matrix, block, aij, compressed row, sparse, parallel 1881 1882 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1883 @*/ 1884 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1885 { 1886 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1887 1888 PetscFunctionBegin; 1889 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1890 if (f) { 1891 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1892 } 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatCreateMPISBAIJ" 1898 /*@C 1899 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1900 (block compressed row). For good matrix assembly performance 1901 the user should preallocate the matrix storage by setting the parameters 1902 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1903 performance can be increased by more than a factor of 50. 1904 1905 Collective on MPI_Comm 1906 1907 Input Parameters: 1908 + comm - MPI communicator 1909 . bs - size of blockk 1910 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1911 This value should be the same as the local size used in creating the 1912 y vector for the matrix-vector product y = Ax. 1913 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1914 This value should be the same as the local size used in creating the 1915 x vector for the matrix-vector product y = Ax. 1916 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1917 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1918 . d_nz - number of block nonzeros per block row in diagonal portion of local 1919 submatrix (same for all local rows) 1920 . d_nnz - array containing the number of block nonzeros in the various block rows 1921 in the upper triangular portion of the in diagonal portion of the local 1922 (possibly different for each block block row) or PETSC_NULL. 1923 You must leave room for the diagonal entry even if it is zero. 1924 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1925 submatrix (same for all local rows). 1926 - o_nnz - array containing the number of nonzeros in the various block rows of the 1927 off-diagonal portion of the local submatrix (possibly different for 1928 each block row) or PETSC_NULL. 1929 1930 Output Parameter: 1931 . A - the matrix 1932 1933 Options Database Keys: 1934 . -mat_no_unroll - uses code that does not unroll the loops in the 1935 block calculations (much slower) 1936 . -mat_block_size - size of the blocks to use 1937 . -mat_mpi - use the parallel matrix data structures even on one processor 1938 (defaults to using SeqBAIJ format on one processor) 1939 1940 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1941 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 1942 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 1943 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1944 1945 Notes: 1946 The number of rows and columns must be divisible by blocksize. 1947 This matrix type does not support complex Hermitian operation. 1948 1949 The user MUST specify either the local or global matrix dimensions 1950 (possibly both). 1951 1952 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1953 than it must be used on all processors that share the object for that argument. 1954 1955 If the *_nnz parameter is given then the *_nz parameter is ignored 1956 1957 Storage Information: 1958 For a square global matrix we define each processor's diagonal portion 1959 to be its local rows and the corresponding columns (a square submatrix); 1960 each processor's off-diagonal portion encompasses the remainder of the 1961 local matrix (a rectangular submatrix). 1962 1963 The user can specify preallocated storage for the diagonal part of 1964 the local submatrix with either d_nz or d_nnz (not both). Set 1965 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1966 memory allocation. Likewise, specify preallocated storage for the 1967 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1968 1969 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1970 the figure below we depict these three local rows and all columns (0-11). 1971 1972 .vb 1973 0 1 2 3 4 5 6 7 8 9 10 11 1974 ------------------- 1975 row 3 | o o o d d d o o o o o o 1976 row 4 | o o o d d d o o o o o o 1977 row 5 | o o o d d d o o o o o o 1978 ------------------- 1979 .ve 1980 1981 Thus, any entries in the d locations are stored in the d (diagonal) 1982 submatrix, and any entries in the o locations are stored in the 1983 o (off-diagonal) submatrix. Note that the d matrix is stored in 1984 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1985 1986 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1987 plus the diagonal part of the d matrix, 1988 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1989 In general, for PDE problems in which most nonzeros are near the diagonal, 1990 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1991 or you will get TERRIBLE performance; see the users' manual chapter on 1992 matrices. 1993 1994 Level: intermediate 1995 1996 .keywords: matrix, block, aij, compressed row, sparse, parallel 1997 1998 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1999 @*/ 2000 2001 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2002 { 2003 PetscErrorCode ierr; 2004 PetscMPIInt size; 2005 2006 PetscFunctionBegin; 2007 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2008 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2009 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2010 if (size > 1) { 2011 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2012 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2013 } else { 2014 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2015 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2016 } 2017 PetscFunctionReturn(0); 2018 } 2019 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2023 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2024 { 2025 Mat mat; 2026 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2027 PetscErrorCode ierr; 2028 PetscInt len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs; 2029 PetscScalar *array; 2030 2031 PetscFunctionBegin; 2032 *newmat = 0; 2033 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2034 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2035 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2036 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2037 ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2038 ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2039 2040 mat->factor = matin->factor; 2041 mat->preallocated = PETSC_TRUE; 2042 mat->assembled = PETSC_TRUE; 2043 mat->insertmode = NOT_SET_VALUES; 2044 2045 a = (Mat_MPISBAIJ*)mat->data; 2046 a->bs2 = oldmat->bs2; 2047 a->mbs = oldmat->mbs; 2048 a->nbs = oldmat->nbs; 2049 a->Mbs = oldmat->Mbs; 2050 a->Nbs = oldmat->Nbs; 2051 2052 2053 a->size = oldmat->size; 2054 a->rank = oldmat->rank; 2055 a->donotstash = oldmat->donotstash; 2056 a->roworiented = oldmat->roworiented; 2057 a->rowindices = 0; 2058 a->rowvalues = 0; 2059 a->getrowactive = PETSC_FALSE; 2060 a->barray = 0; 2061 a->rstartbs = oldmat->rstartbs; 2062 a->rendbs = oldmat->rendbs; 2063 a->cstartbs = oldmat->cstartbs; 2064 a->cendbs = oldmat->cendbs; 2065 2066 /* hash table stuff */ 2067 a->ht = 0; 2068 a->hd = 0; 2069 a->ht_size = 0; 2070 a->ht_flag = oldmat->ht_flag; 2071 a->ht_fact = oldmat->ht_fact; 2072 a->ht_total_ct = 0; 2073 a->ht_insert_ct = 0; 2074 2075 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2076 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2077 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2078 if (oldmat->colmap) { 2079 #if defined (PETSC_USE_CTABLE) 2080 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2081 #else 2082 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2083 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2084 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2085 #endif 2086 } else a->colmap = 0; 2087 2088 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2089 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2090 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2091 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2092 } else a->garray = 0; 2093 2094 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2095 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2096 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2097 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2098 2099 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2100 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2101 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2102 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2103 2104 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2105 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2106 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2107 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2108 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2109 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2110 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2111 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2112 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2113 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2114 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2115 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2116 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2117 2118 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2119 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2120 a->sMvctx = oldmat->sMvctx; 2121 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2122 2123 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2124 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2125 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2126 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2127 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2128 *newmat = mat; 2129 PetscFunctionReturn(0); 2130 } 2131 2132 #include "petscsys.h" 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatLoad_MPISBAIJ" 2136 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2137 { 2138 Mat A; 2139 PetscErrorCode ierr; 2140 PetscInt i,nz,j,rstart,rend; 2141 PetscScalar *vals,*buf; 2142 MPI_Comm comm = ((PetscObject)viewer)->comm; 2143 MPI_Status status; 2144 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 2145 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2146 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2147 PetscInt bs=1,Mbs,mbs,extra_rows; 2148 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2149 PetscInt dcount,kmax,k,nzcount,tmp; 2150 int fd; 2151 2152 PetscFunctionBegin; 2153 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2154 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2155 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2156 2157 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2158 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2159 if (!rank) { 2160 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2161 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2162 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2163 if (header[3] < 0) { 2164 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2165 } 2166 } 2167 2168 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2169 M = header[1]; N = header[2]; 2170 2171 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2172 2173 /* 2174 This code adds extra rows to make sure the number of rows is 2175 divisible by the blocksize 2176 */ 2177 Mbs = M/bs; 2178 extra_rows = bs - M + bs*(Mbs); 2179 if (extra_rows == bs) extra_rows = 0; 2180 else Mbs++; 2181 if (extra_rows &&!rank) { 2182 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2183 } 2184 2185 /* determine ownership of all rows */ 2186 mbs = Mbs/size + ((Mbs % size) > rank); 2187 m = mbs*bs; 2188 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2189 browners = rowners + size + 1; 2190 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2191 rowners[0] = 0; 2192 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2193 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2194 rstart = rowners[rank]; 2195 rend = rowners[rank+1]; 2196 2197 /* distribute row lengths to all processors */ 2198 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2199 if (!rank) { 2200 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2201 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2202 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2203 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2204 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2205 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2206 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2207 } else { 2208 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2209 } 2210 2211 if (!rank) { /* procs[0] */ 2212 /* calculate the number of nonzeros on each processor */ 2213 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2214 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2215 for (i=0; i<size; i++) { 2216 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2217 procsnz[i] += rowlengths[j]; 2218 } 2219 } 2220 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2221 2222 /* determine max buffer needed and allocate it */ 2223 maxnz = 0; 2224 for (i=0; i<size; i++) { 2225 maxnz = PetscMax(maxnz,procsnz[i]); 2226 } 2227 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2228 2229 /* read in my part of the matrix column indices */ 2230 nz = procsnz[0]; 2231 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2232 mycols = ibuf; 2233 if (size == 1) nz -= extra_rows; 2234 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2235 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2236 2237 /* read in every ones (except the last) and ship off */ 2238 for (i=1; i<size-1; i++) { 2239 nz = procsnz[i]; 2240 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2241 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2242 } 2243 /* read in the stuff for the last proc */ 2244 if (size != 1) { 2245 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2246 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2247 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2248 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2249 } 2250 ierr = PetscFree(cols);CHKERRQ(ierr); 2251 } else { /* procs[i], i>0 */ 2252 /* determine buffer space needed for message */ 2253 nz = 0; 2254 for (i=0; i<m; i++) { 2255 nz += locrowlens[i]; 2256 } 2257 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2258 mycols = ibuf; 2259 /* receive message of column indices*/ 2260 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2261 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2262 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2263 } 2264 2265 /* loop over local rows, determining number of off diagonal entries */ 2266 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2267 odlens = dlens + (rend-rstart); 2268 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2269 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2270 masked1 = mask + Mbs; 2271 masked2 = masked1 + Mbs; 2272 rowcount = 0; nzcount = 0; 2273 for (i=0; i<mbs; i++) { 2274 dcount = 0; 2275 odcount = 0; 2276 for (j=0; j<bs; j++) { 2277 kmax = locrowlens[rowcount]; 2278 for (k=0; k<kmax; k++) { 2279 tmp = mycols[nzcount++]/bs; /* block col. index */ 2280 if (!mask[tmp]) { 2281 mask[tmp] = 1; 2282 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2283 else masked1[dcount++] = tmp; /* entry in diag portion */ 2284 } 2285 } 2286 rowcount++; 2287 } 2288 2289 dlens[i] = dcount; /* d_nzz[i] */ 2290 odlens[i] = odcount; /* o_nzz[i] */ 2291 2292 /* zero out the mask elements we set */ 2293 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2294 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2295 } 2296 2297 /* create our matrix */ 2298 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2299 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2300 ierr = MatSetType(A,type);CHKERRQ(ierr); 2301 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2302 2303 if (!rank) { 2304 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2305 /* read in my part of the matrix numerical values */ 2306 nz = procsnz[0]; 2307 vals = buf; 2308 mycols = ibuf; 2309 if (size == 1) nz -= extra_rows; 2310 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2311 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2312 2313 /* insert into matrix */ 2314 jj = rstart*bs; 2315 for (i=0; i<m; i++) { 2316 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2317 mycols += locrowlens[i]; 2318 vals += locrowlens[i]; 2319 jj++; 2320 } 2321 2322 /* read in other processors (except the last one) and ship out */ 2323 for (i=1; i<size-1; i++) { 2324 nz = procsnz[i]; 2325 vals = buf; 2326 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2327 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2328 } 2329 /* the last proc */ 2330 if (size != 1){ 2331 nz = procsnz[i] - extra_rows; 2332 vals = buf; 2333 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2334 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2335 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2336 } 2337 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2338 2339 } else { 2340 /* receive numeric values */ 2341 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2342 2343 /* receive message of values*/ 2344 vals = buf; 2345 mycols = ibuf; 2346 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2347 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2348 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2349 2350 /* insert into matrix */ 2351 jj = rstart*bs; 2352 for (i=0; i<m; i++) { 2353 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2354 mycols += locrowlens[i]; 2355 vals += locrowlens[i]; 2356 jj++; 2357 } 2358 } 2359 2360 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2361 ierr = PetscFree(buf);CHKERRQ(ierr); 2362 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2363 ierr = PetscFree(rowners);CHKERRQ(ierr); 2364 ierr = PetscFree(dlens);CHKERRQ(ierr); 2365 ierr = PetscFree(mask);CHKERRQ(ierr); 2366 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2367 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2368 *newmat = A; 2369 PetscFunctionReturn(0); 2370 } 2371 2372 #undef __FUNCT__ 2373 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2374 /*XXXXX@ 2375 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2376 2377 Input Parameters: 2378 . mat - the matrix 2379 . fact - factor 2380 2381 Collective on Mat 2382 2383 Level: advanced 2384 2385 Notes: 2386 This can also be set by the command line option: -mat_use_hash_table fact 2387 2388 .keywords: matrix, hashtable, factor, HT 2389 2390 .seealso: MatSetOption() 2391 @XXXXX*/ 2392 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2396 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2397 { 2398 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2399 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2400 PetscReal atmp; 2401 PetscReal *work,*svalues,*rvalues; 2402 PetscErrorCode ierr; 2403 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2404 PetscMPIInt rank,size; 2405 PetscInt *rowners_bs,dest,count,source; 2406 PetscScalar *va; 2407 MatScalar *ba; 2408 MPI_Status stat; 2409 2410 PetscFunctionBegin; 2411 if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2412 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2413 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2414 2415 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2416 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2417 2418 bs = A->rmap.bs; 2419 mbs = a->mbs; 2420 Mbs = a->Mbs; 2421 ba = b->a; 2422 bi = b->i; 2423 bj = b->j; 2424 2425 /* find ownerships */ 2426 rowners_bs = A->rmap.range; 2427 2428 /* each proc creates an array to be distributed */ 2429 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2430 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2431 2432 /* row_max for B */ 2433 if (rank != size-1){ 2434 for (i=0; i<mbs; i++) { 2435 ncols = bi[1] - bi[0]; bi++; 2436 brow = bs*i; 2437 for (j=0; j<ncols; j++){ 2438 bcol = bs*(*bj); 2439 for (kcol=0; kcol<bs; kcol++){ 2440 col = bcol + kcol; /* local col index */ 2441 col += rowners_bs[rank+1]; /* global col index */ 2442 for (krow=0; krow<bs; krow++){ 2443 atmp = PetscAbsScalar(*ba); ba++; 2444 row = brow + krow; /* local row index */ 2445 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2446 if (work[col] < atmp) work[col] = atmp; 2447 } 2448 } 2449 bj++; 2450 } 2451 } 2452 2453 /* send values to its owners */ 2454 for (dest=rank+1; dest<size; dest++){ 2455 svalues = work + rowners_bs[dest]; 2456 count = rowners_bs[dest+1]-rowners_bs[dest]; 2457 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2458 } 2459 } 2460 2461 /* receive values */ 2462 if (rank){ 2463 rvalues = work; 2464 count = rowners_bs[rank+1]-rowners_bs[rank]; 2465 for (source=0; source<rank; source++){ 2466 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2467 /* process values */ 2468 for (i=0; i<count; i++){ 2469 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2470 } 2471 } 2472 } 2473 2474 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2475 ierr = PetscFree(work);CHKERRQ(ierr); 2476 PetscFunctionReturn(0); 2477 } 2478 2479 #undef __FUNCT__ 2480 #define __FUNCT__ "MatRelax_MPISBAIJ" 2481 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2482 { 2483 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2484 PetscErrorCode ierr; 2485 PetscInt mbs=mat->mbs,bs=matin->rmap.bs; 2486 PetscScalar *x,*b,*ptr,zero=0.0; 2487 Vec bb1; 2488 2489 PetscFunctionBegin; 2490 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2491 if (bs > 1) 2492 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2493 2494 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2495 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2496 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2497 its--; 2498 } 2499 2500 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2501 while (its--){ 2502 2503 /* lower triangular part: slvec0b = - B^T*xx */ 2504 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2505 2506 /* copy xx into slvec0a */ 2507 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2508 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2509 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2510 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2511 2512 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2513 2514 /* copy bb into slvec1a */ 2515 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2516 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2517 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2518 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2519 2520 /* set slvec1b = 0 */ 2521 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2522 2523 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2524 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2525 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2526 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2527 2528 /* upper triangular part: bb1 = bb1 - B*x */ 2529 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2530 2531 /* local diagonal sweep */ 2532 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2533 } 2534 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2535 } else { 2536 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2537 } 2538 PetscFunctionReturn(0); 2539 } 2540 2541 #undef __FUNCT__ 2542 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2543 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2544 { 2545 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2546 PetscErrorCode ierr; 2547 Vec lvec1,bb1; 2548 2549 PetscFunctionBegin; 2550 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2551 if (matin->rmap.bs > 1) 2552 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2553 2554 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2555 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2556 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2557 its--; 2558 } 2559 2560 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2561 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2562 while (its--){ 2563 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2564 2565 /* lower diagonal part: bb1 = bb - B^T*xx */ 2566 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2567 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2568 2569 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2570 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2571 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2572 2573 /* upper diagonal part: bb1 = bb1 - B*x */ 2574 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2575 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2576 2577 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2578 2579 /* diagonal sweep */ 2580 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2581 } 2582 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2583 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2584 } else { 2585 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2586 } 2587 PetscFunctionReturn(0); 2588 } 2589 2590