1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 /* UGLY, ugly, ugly 24 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 25 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 26 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 27 converts the entries into single precision and then calls ..._MatScalar() to put them 28 into the single precision data structures. 29 */ 30 #if defined(PETSC_USE_MAT_SINGLE) 31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 #else 37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 38 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 40 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 42 #endif 43 44 EXTERN_C_BEGIN 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 47 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 48 { 49 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 EXTERN_C_END 58 59 EXTERN_C_BEGIN 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 62 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 63 { 64 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 69 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 EXTERN_C_END 73 74 75 #define CHUNKSIZE 10 76 77 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 78 { \ 79 \ 80 brow = row/bs; \ 81 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 82 rmax = aimax[brow]; nrow = ailen[brow]; \ 83 bcol = col/bs; \ 84 ridx = row % bs; cidx = col % bs; \ 85 low = 0; high = nrow; \ 86 while (high-low > 3) { \ 87 t = (low+high)/2; \ 88 if (rp[t] > bcol) high = t; \ 89 else low = t; \ 90 } \ 91 for (_i=low; _i<high; _i++) { \ 92 if (rp[_i] > bcol) break; \ 93 if (rp[_i] == bcol) { \ 94 bap = ap + bs2*_i + bs*cidx + ridx; \ 95 if (addv == ADD_VALUES) *bap += value; \ 96 else *bap = value; \ 97 goto a_noinsert; \ 98 } \ 99 } \ 100 if (a->nonew == 1) goto a_noinsert; \ 101 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 102 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 103 N = nrow++ - 1; \ 104 /* shift up all the later entries in this row */ \ 105 for (ii=N; ii>=_i; ii--) { \ 106 rp[ii+1] = rp[ii]; \ 107 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 108 } \ 109 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 110 rp[_i] = bcol; \ 111 ap[bs2*_i + bs*cidx + ridx] = value; \ 112 a_noinsert:; \ 113 ailen[brow] = nrow; \ 114 } 115 #ifndef MatSetValues_SeqBAIJ_B_Private 116 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 117 { \ 118 brow = row/bs; \ 119 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 120 rmax = bimax[brow]; nrow = bilen[brow]; \ 121 bcol = col/bs; \ 122 ridx = row % bs; cidx = col % bs; \ 123 low = 0; high = nrow; \ 124 while (high-low > 3) { \ 125 t = (low+high)/2; \ 126 if (rp[t] > bcol) high = t; \ 127 else low = t; \ 128 } \ 129 for (_i=low; _i<high; _i++) { \ 130 if (rp[_i] > bcol) break; \ 131 if (rp[_i] == bcol) { \ 132 bap = ap + bs2*_i + bs*cidx + ridx; \ 133 if (addv == ADD_VALUES) *bap += value; \ 134 else *bap = value; \ 135 goto b_noinsert; \ 136 } \ 137 } \ 138 if (b->nonew == 1) goto b_noinsert; \ 139 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 140 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 141 N = nrow++ - 1; \ 142 /* shift up all the later entries in this row */ \ 143 for (ii=N; ii>=_i; ii--) { \ 144 rp[ii+1] = rp[ii]; \ 145 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 146 } \ 147 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 148 rp[_i] = bcol; \ 149 ap[bs2*_i + bs*cidx + ridx] = value; \ 150 b_noinsert:; \ 151 bilen[brow] = nrow; \ 152 } 153 #endif 154 155 #if defined(PETSC_USE_MAT_SINGLE) 156 #undef __FUNCT__ 157 #define __FUNCT__ "MatSetValues_MPISBAIJ" 158 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 159 { 160 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 161 PetscErrorCode ierr; 162 PetscInt i,N = m*n; 163 MatScalar *vsingle; 164 165 PetscFunctionBegin; 166 if (N > b->setvalueslen) { 167 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 168 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 169 b->setvalueslen = N; 170 } 171 vsingle = b->setvaluescopy; 172 173 for (i=0; i<N; i++) { 174 vsingle[i] = v[i]; 175 } 176 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 182 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 183 { 184 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 185 PetscErrorCode ierr; 186 PetscInt i,N = m*n*b->bs2; 187 MatScalar *vsingle; 188 189 PetscFunctionBegin; 190 if (N > b->setvalueslen) { 191 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 192 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 193 b->setvalueslen = N; 194 } 195 vsingle = b->setvaluescopy; 196 for (i=0; i<N; i++) { 197 vsingle[i] = v[i]; 198 } 199 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 #endif 203 204 /* Only add/insert a(i,j) with i<=j (blocks). 205 Any a(i,j) with i>j input by user is ingored. 206 */ 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar" 209 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 210 { 211 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 212 MatScalar value; 213 PetscTruth roworiented = baij->roworiented; 214 PetscErrorCode ierr; 215 PetscInt i,j,row,col; 216 PetscInt rstart_orig=mat->rmap.rstart; 217 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 218 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 219 220 /* Some Variables required in the macro */ 221 Mat A = baij->A; 222 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 223 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 224 MatScalar *aa=a->a; 225 226 Mat B = baij->B; 227 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 228 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 229 MatScalar *ba=b->a; 230 231 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 232 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 233 MatScalar *ap,*bap; 234 235 /* for stash */ 236 PetscInt n_loc, *in_loc = PETSC_NULL; 237 MatScalar *v_loc = PETSC_NULL; 238 239 PetscFunctionBegin; 240 241 if (!baij->donotstash){ 242 if (n > baij->n_loc) { 243 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 244 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 245 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 246 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 247 baij->n_loc = n; 248 } 249 in_loc = baij->in_loc; 250 v_loc = baij->v_loc; 251 } 252 253 for (i=0; i<m; i++) { 254 if (im[i] < 0) continue; 255 #if defined(PETSC_USE_DEBUG) 256 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 257 #endif 258 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 259 row = im[i] - rstart_orig; /* local row index */ 260 for (j=0; j<n; j++) { 261 if (im[i]/bs > in[j]/bs){ 262 if (a->ignore_ltriangular){ 263 continue; /* ignore lower triangular blocks */ 264 } else { 265 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 266 } 267 } 268 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 269 col = in[j] - cstart_orig; /* local col index */ 270 brow = row/bs; bcol = col/bs; 271 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 272 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 273 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 274 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 275 } else if (in[j] < 0) continue; 276 #if defined(PETSC_USE_DEBUG) 277 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 278 #endif 279 else { /* off-diag entry (B) */ 280 if (mat->was_assembled) { 281 if (!baij->colmap) { 282 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 283 } 284 #if defined (PETSC_USE_CTABLE) 285 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 286 col = col - 1; 287 #else 288 col = baij->colmap[in[j]/bs] - 1; 289 #endif 290 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 291 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 292 col = in[j]; 293 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 294 B = baij->B; 295 b = (Mat_SeqBAIJ*)(B)->data; 296 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 297 ba=b->a; 298 } else col += in[j]%bs; 299 } else col = in[j]; 300 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 301 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 302 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 303 } 304 } 305 } else { /* off processor entry */ 306 if (!baij->donotstash) { 307 n_loc = 0; 308 for (j=0; j<n; j++){ 309 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 310 in_loc[n_loc] = in[j]; 311 if (roworiented) { 312 v_loc[n_loc] = v[i*n+j]; 313 } else { 314 v_loc[n_loc] = v[j*m+i]; 315 } 316 n_loc++; 317 } 318 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 319 } 320 } 321 } 322 PetscFunctionReturn(0); 323 } 324 325 #undef __FUNCT__ 326 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 327 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 328 { 329 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 330 const MatScalar *value; 331 MatScalar *barray=baij->barray; 332 PetscTruth roworiented = baij->roworiented; 333 PetscErrorCode ierr; 334 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 335 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 336 PetscInt cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2; 337 338 PetscFunctionBegin; 339 if(!barray) { 340 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 341 baij->barray = barray; 342 } 343 344 if (roworiented) { 345 stepval = (n-1)*bs; 346 } else { 347 stepval = (m-1)*bs; 348 } 349 for (i=0; i<m; i++) { 350 if (im[i] < 0) continue; 351 #if defined(PETSC_USE_DEBUG) 352 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 353 #endif 354 if (im[i] >= rstart && im[i] < rend) { 355 row = im[i] - rstart; 356 for (j=0; j<n; j++) { 357 /* If NumCol = 1 then a copy is not required */ 358 if ((roworiented) && (n == 1)) { 359 barray = (MatScalar*) v + i*bs2; 360 } else if((!roworiented) && (m == 1)) { 361 barray = (MatScalar*) v + j*bs2; 362 } else { /* Here a copy is required */ 363 if (roworiented) { 364 value = v + i*(stepval+bs)*bs + j*bs; 365 } else { 366 value = v + j*(stepval+bs)*bs + i*bs; 367 } 368 for (ii=0; ii<bs; ii++,value+=stepval) { 369 for (jj=0; jj<bs; jj++) { 370 *barray++ = *value++; 371 } 372 } 373 barray -=bs2; 374 } 375 376 if (in[j] >= cstart && in[j] < cend){ 377 col = in[j] - cstart; 378 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 379 } 380 else if (in[j] < 0) continue; 381 #if defined(PETSC_USE_DEBUG) 382 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 383 #endif 384 else { 385 if (mat->was_assembled) { 386 if (!baij->colmap) { 387 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 388 } 389 390 #if defined(PETSC_USE_DEBUG) 391 #if defined (PETSC_USE_CTABLE) 392 { PetscInt data; 393 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 394 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 395 } 396 #else 397 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 398 #endif 399 #endif 400 #if defined (PETSC_USE_CTABLE) 401 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 402 col = (col - 1)/bs; 403 #else 404 col = (baij->colmap[in[j]] - 1)/bs; 405 #endif 406 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 407 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 408 col = in[j]; 409 } 410 } 411 else col = in[j]; 412 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 413 } 414 } 415 } else { 416 if (!baij->donotstash) { 417 if (roworiented) { 418 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 419 } else { 420 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 421 } 422 } 423 } 424 } 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatGetValues_MPISBAIJ" 430 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 431 { 432 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 433 PetscErrorCode ierr; 434 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 435 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 436 437 PetscFunctionBegin; 438 for (i=0; i<m; i++) { 439 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 440 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 441 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 442 row = idxm[i] - bsrstart; 443 for (j=0; j<n; j++) { 444 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 445 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 446 if (idxn[j] >= bscstart && idxn[j] < bscend){ 447 col = idxn[j] - bscstart; 448 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 449 } else { 450 if (!baij->colmap) { 451 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 452 } 453 #if defined (PETSC_USE_CTABLE) 454 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 455 data --; 456 #else 457 data = baij->colmap[idxn[j]/bs]-1; 458 #endif 459 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 460 else { 461 col = data + idxn[j]%bs; 462 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 463 } 464 } 465 } 466 } else { 467 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 468 } 469 } 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNCT__ 474 #define __FUNCT__ "MatNorm_MPISBAIJ" 475 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 476 { 477 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 478 PetscErrorCode ierr; 479 PetscReal sum[2],*lnorm2; 480 481 PetscFunctionBegin; 482 if (baij->size == 1) { 483 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 484 } else { 485 if (type == NORM_FROBENIUS) { 486 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 487 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 488 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 489 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 490 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 491 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 492 *norm = sqrt(sum[0] + 2*sum[1]); 493 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 494 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 495 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 496 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 497 PetscReal *rsum,*rsum2,vabs; 498 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 499 PetscInt brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs; 500 MatScalar *v; 501 502 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 503 rsum2 = rsum + mat->cmap.N; 504 ierr = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 505 /* Amat */ 506 v = amat->a; jj = amat->j; 507 for (brow=0; brow<mbs; brow++) { 508 grow = bs*(rstart + brow); 509 nz = amat->i[brow+1] - amat->i[brow]; 510 for (bcol=0; bcol<nz; bcol++){ 511 gcol = bs*(rstart + *jj); jj++; 512 for (col=0; col<bs; col++){ 513 for (row=0; row<bs; row++){ 514 vabs = PetscAbsScalar(*v); v++; 515 rsum[gcol+col] += vabs; 516 /* non-diagonal block */ 517 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 518 } 519 } 520 } 521 } 522 /* Bmat */ 523 v = bmat->a; jj = bmat->j; 524 for (brow=0; brow<mbs; brow++) { 525 grow = bs*(rstart + brow); 526 nz = bmat->i[brow+1] - bmat->i[brow]; 527 for (bcol=0; bcol<nz; bcol++){ 528 gcol = bs*garray[*jj]; jj++; 529 for (col=0; col<bs; col++){ 530 for (row=0; row<bs; row++){ 531 vabs = PetscAbsScalar(*v); v++; 532 rsum[gcol+col] += vabs; 533 rsum[grow+row] += vabs; 534 } 535 } 536 } 537 } 538 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 539 *norm = 0.0; 540 for (col=0; col<mat->cmap.N; col++) { 541 if (rsum2[col] > *norm) *norm = rsum2[col]; 542 } 543 ierr = PetscFree(rsum);CHKERRQ(ierr); 544 } else { 545 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 546 } 547 } 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 553 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 554 { 555 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 556 PetscErrorCode ierr; 557 PetscInt nstash,reallocs; 558 InsertMode addv; 559 560 PetscFunctionBegin; 561 if (baij->donotstash) { 562 PetscFunctionReturn(0); 563 } 564 565 /* make sure all processors are either in INSERTMODE or ADDMODE */ 566 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 567 if (addv == (ADD_VALUES|INSERT_VALUES)) { 568 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 569 } 570 mat->insertmode = addv; /* in case this processor had no cache */ 571 572 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 573 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 574 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 575 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 576 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 577 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 583 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 584 { 585 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 586 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 589 PetscInt *row,*col,other_disassembled; 590 PetscMPIInt n; 591 PetscTruth r1,r2,r3; 592 MatScalar *val; 593 InsertMode addv = mat->insertmode; 594 595 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 596 PetscFunctionBegin; 597 598 if (!baij->donotstash) { 599 while (1) { 600 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 601 if (!flg) break; 602 603 for (i=0; i<n;) { 604 /* Now identify the consecutive vals belonging to the same row */ 605 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 606 if (j < n) ncols = j-i; 607 else ncols = n-i; 608 /* Now assemble all these values with a single function call */ 609 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 610 i = j; 611 } 612 } 613 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 614 /* Now process the block-stash. Since the values are stashed column-oriented, 615 set the roworiented flag to column oriented, and after MatSetValues() 616 restore the original flags */ 617 r1 = baij->roworiented; 618 r2 = a->roworiented; 619 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 620 baij->roworiented = PETSC_FALSE; 621 a->roworiented = PETSC_FALSE; 622 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 623 while (1) { 624 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 625 if (!flg) break; 626 627 for (i=0; i<n;) { 628 /* Now identify the consecutive vals belonging to the same row */ 629 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 630 if (j < n) ncols = j-i; 631 else ncols = n-i; 632 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 633 i = j; 634 } 635 } 636 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 637 baij->roworiented = r1; 638 a->roworiented = r2; 639 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 640 } 641 642 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 643 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 644 645 /* determine if any processor has disassembled, if so we must 646 also disassemble ourselfs, in order that we may reassemble. */ 647 /* 648 if nonzero structure of submatrix B cannot change then we know that 649 no processor disassembled thus we can skip this stuff 650 */ 651 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 652 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 653 if (mat->was_assembled && !other_disassembled) { 654 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 655 } 656 } 657 658 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 659 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 660 } 661 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 662 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 663 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 664 665 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 666 baij->rowvalues = 0; 667 668 PetscFunctionReturn(0); 669 } 670 671 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 675 { 676 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 677 PetscErrorCode ierr; 678 PetscInt bs = mat->rmap.bs; 679 PetscMPIInt size = baij->size,rank = baij->rank; 680 PetscTruth iascii,isdraw; 681 PetscViewer sviewer; 682 PetscViewerFormat format; 683 684 PetscFunctionBegin; 685 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 686 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 687 if (iascii) { 688 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 689 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 690 MatInfo info; 691 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 692 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 694 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 695 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 696 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 697 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 698 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 699 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 700 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 701 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 702 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } else if (format == PETSC_VIEWER_ASCII_INFO) { 705 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 708 PetscFunctionReturn(0); 709 } 710 } 711 712 if (isdraw) { 713 PetscDraw draw; 714 PetscTruth isnull; 715 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 716 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 717 } 718 719 if (size == 1) { 720 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 721 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 722 } else { 723 /* assemble the entire matrix onto first processor. */ 724 Mat A; 725 Mat_SeqSBAIJ *Aloc; 726 Mat_SeqBAIJ *Bloc; 727 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 728 MatScalar *a; 729 730 /* Should this be the same type as mat? */ 731 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 732 if (!rank) { 733 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 734 } else { 735 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 736 } 737 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 738 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 739 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 740 741 /* copy over the A part */ 742 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 743 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 744 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 745 746 for (i=0; i<mbs; i++) { 747 rvals[0] = bs*(baij->rstartbs + i); 748 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 749 for (j=ai[i]; j<ai[i+1]; j++) { 750 col = (baij->cstartbs+aj[j])*bs; 751 for (k=0; k<bs; k++) { 752 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 753 col++; a += bs; 754 } 755 } 756 } 757 /* copy over the B part */ 758 Bloc = (Mat_SeqBAIJ*)baij->B->data; 759 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 760 for (i=0; i<mbs; i++) { 761 762 rvals[0] = bs*(baij->rstartbs + i); 763 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 col = baij->garray[aj[j]]*bs; 766 for (k=0; k<bs; k++) { 767 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 768 col++; a += bs; 769 } 770 } 771 } 772 ierr = PetscFree(rvals);CHKERRQ(ierr); 773 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 774 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 775 /* 776 Everyone has to call to draw the matrix since the graphics waits are 777 synchronized across all processors that share the PetscDraw object 778 */ 779 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 780 if (!rank) { 781 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 782 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 783 } 784 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 785 ierr = MatDestroy(A);CHKERRQ(ierr); 786 } 787 PetscFunctionReturn(0); 788 } 789 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatView_MPISBAIJ" 792 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 793 { 794 PetscErrorCode ierr; 795 PetscTruth iascii,isdraw,issocket,isbinary; 796 797 PetscFunctionBegin; 798 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 799 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 800 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 801 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 802 if (iascii || isdraw || issocket || isbinary) { 803 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 804 } else { 805 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 806 } 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatDestroy_MPISBAIJ" 812 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 813 { 814 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 815 PetscErrorCode ierr; 816 817 PetscFunctionBegin; 818 #if defined(PETSC_USE_LOG) 819 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 820 #endif 821 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 822 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 823 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 824 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 825 #if defined (PETSC_USE_CTABLE) 826 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 827 #else 828 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 829 #endif 830 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 831 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 832 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 833 if (baij->slvec0) { 834 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 835 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 836 } 837 if (baij->slvec1) { 838 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 839 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 840 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 841 } 842 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 843 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 844 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 845 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 846 #if defined(PETSC_USE_MAT_SINGLE) 847 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 848 #endif 849 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 850 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 851 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 852 ierr = PetscFree(baij);CHKERRQ(ierr); 853 854 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 855 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 856 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 857 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "MatMult_MPISBAIJ" 864 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 865 { 866 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 867 PetscErrorCode ierr; 868 PetscInt nt,mbs=a->mbs,bs=A->rmap.bs; 869 PetscScalar *x,*from,zero=0.0; 870 871 PetscFunctionBegin; 872 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 873 if (nt != A->cmap.n) { 874 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 875 } 876 877 /* diagonal part */ 878 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 879 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 880 881 /* subdiagonal part */ 882 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 883 CHKMEMQ; 884 /* copy x into the vec slvec0 */ 885 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 886 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 887 CHKMEMQ; 888 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 889 CHKMEMQ; 890 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 891 892 CHKMEMQ; 893 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 894 CHKMEMQ; 895 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 896 CHKMEMQ; 897 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 898 CHKMEMQ; 899 /* supperdiagonal part */ 900 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 901 CHKMEMQ; 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNCT__ 906 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 907 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 908 { 909 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 910 PetscErrorCode ierr; 911 PetscInt nt; 912 913 PetscFunctionBegin; 914 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 915 if (nt != A->cmap.n) { 916 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 917 } 918 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 919 if (nt != A->rmap.N) { 920 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 921 } 922 923 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 924 /* do diagonal part */ 925 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 926 /* do supperdiagonal part */ 927 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 928 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 929 /* do subdiagonal part */ 930 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 931 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 932 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 933 934 PetscFunctionReturn(0); 935 } 936 937 #undef __FUNCT__ 938 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 939 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 940 { 941 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 942 PetscErrorCode ierr; 943 PetscInt mbs=a->mbs,bs=A->rmap.bs; 944 PetscScalar *x,*from,zero=0.0; 945 946 PetscFunctionBegin; 947 /* 948 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 949 PetscSynchronizedFlush(A->comm); 950 */ 951 /* diagonal part */ 952 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 953 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 954 955 /* subdiagonal part */ 956 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 957 958 /* copy x into the vec slvec0 */ 959 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 960 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 961 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 962 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 963 964 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 965 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 966 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 967 968 /* supperdiagonal part */ 969 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 970 971 PetscFunctionReturn(0); 972 } 973 974 #undef __FUNCT__ 975 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 976 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 977 { 978 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 979 PetscErrorCode ierr; 980 981 PetscFunctionBegin; 982 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 983 /* do diagonal part */ 984 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 985 /* do supperdiagonal part */ 986 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 987 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 988 989 /* do subdiagonal part */ 990 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 991 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 992 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 993 994 PetscFunctionReturn(0); 995 } 996 997 /* 998 This only works correctly for square matrices where the subblock A->A is the 999 diagonal block 1000 */ 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1003 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1004 { 1005 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1010 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatScale_MPISBAIJ" 1016 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1017 { 1018 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1019 PetscErrorCode ierr; 1020 1021 PetscFunctionBegin; 1022 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1023 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1029 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1030 { 1031 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1032 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1033 PetscErrorCode ierr; 1034 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1035 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1036 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1037 1038 PetscFunctionBegin; 1039 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1040 mat->getrowactive = PETSC_TRUE; 1041 1042 if (!mat->rowvalues && (idx || v)) { 1043 /* 1044 allocate enough space to hold information from the longest row. 1045 */ 1046 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1047 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1048 PetscInt max = 1,mbs = mat->mbs,tmp; 1049 for (i=0; i<mbs; i++) { 1050 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1051 if (max < tmp) { max = tmp; } 1052 } 1053 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1054 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1055 } 1056 1057 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1058 lrow = row - brstart; /* local row index */ 1059 1060 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1061 if (!v) {pvA = 0; pvB = 0;} 1062 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1063 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1064 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1065 nztot = nzA + nzB; 1066 1067 cmap = mat->garray; 1068 if (v || idx) { 1069 if (nztot) { 1070 /* Sort by increasing column numbers, assuming A and B already sorted */ 1071 PetscInt imark = -1; 1072 if (v) { 1073 *v = v_p = mat->rowvalues; 1074 for (i=0; i<nzB; i++) { 1075 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1076 else break; 1077 } 1078 imark = i; 1079 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1080 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1081 } 1082 if (idx) { 1083 *idx = idx_p = mat->rowindices; 1084 if (imark > -1) { 1085 for (i=0; i<imark; i++) { 1086 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1087 } 1088 } else { 1089 for (i=0; i<nzB; i++) { 1090 if (cmap[cworkB[i]/bs] < cstart) 1091 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1092 else break; 1093 } 1094 imark = i; 1095 } 1096 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1097 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1098 } 1099 } else { 1100 if (idx) *idx = 0; 1101 if (v) *v = 0; 1102 } 1103 } 1104 *nz = nztot; 1105 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1106 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1112 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1113 { 1114 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1115 1116 PetscFunctionBegin; 1117 if (!baij->getrowactive) { 1118 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1119 } 1120 baij->getrowactive = PETSC_FALSE; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1126 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1127 { 1128 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1129 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1130 1131 PetscFunctionBegin; 1132 aA->getrow_utriangular = PETSC_TRUE; 1133 PetscFunctionReturn(0); 1134 } 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1137 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1138 { 1139 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1140 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1141 1142 PetscFunctionBegin; 1143 aA->getrow_utriangular = PETSC_FALSE; 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1149 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1150 { 1151 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1152 PetscErrorCode ierr; 1153 1154 PetscFunctionBegin; 1155 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1156 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1162 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1163 { 1164 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1169 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1175 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1176 { 1177 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1178 PetscErrorCode ierr; 1179 1180 PetscFunctionBegin; 1181 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1182 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1188 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1189 { 1190 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1191 Mat A = a->A,B = a->B; 1192 PetscErrorCode ierr; 1193 PetscReal isend[5],irecv[5]; 1194 1195 PetscFunctionBegin; 1196 info->block_size = (PetscReal)matin->rmap.bs; 1197 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1198 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1199 isend[3] = info->memory; isend[4] = info->mallocs; 1200 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1201 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1202 isend[3] += info->memory; isend[4] += info->mallocs; 1203 if (flag == MAT_LOCAL) { 1204 info->nz_used = isend[0]; 1205 info->nz_allocated = isend[1]; 1206 info->nz_unneeded = isend[2]; 1207 info->memory = isend[3]; 1208 info->mallocs = isend[4]; 1209 } else if (flag == MAT_GLOBAL_MAX) { 1210 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1211 info->nz_used = irecv[0]; 1212 info->nz_allocated = irecv[1]; 1213 info->nz_unneeded = irecv[2]; 1214 info->memory = irecv[3]; 1215 info->mallocs = irecv[4]; 1216 } else if (flag == MAT_GLOBAL_SUM) { 1217 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1218 info->nz_used = irecv[0]; 1219 info->nz_allocated = irecv[1]; 1220 info->nz_unneeded = irecv[2]; 1221 info->memory = irecv[3]; 1222 info->mallocs = irecv[4]; 1223 } else { 1224 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1225 } 1226 info->rows_global = (PetscReal)A->rmap.N; 1227 info->columns_global = (PetscReal)A->cmap.N; 1228 info->rows_local = (PetscReal)A->rmap.N; 1229 info->columns_local = (PetscReal)A->cmap.N; 1230 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1231 info->fill_ratio_needed = 0; 1232 info->factor_mallocs = 0; 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1238 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1239 { 1240 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1241 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1242 PetscErrorCode ierr; 1243 1244 PetscFunctionBegin; 1245 switch (op) { 1246 case MAT_NO_NEW_NONZERO_LOCATIONS: 1247 case MAT_YES_NEW_NONZERO_LOCATIONS: 1248 case MAT_COLUMNS_UNSORTED: 1249 case MAT_COLUMNS_SORTED: 1250 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1251 case MAT_KEEP_ZEROED_ROWS: 1252 case MAT_NEW_NONZERO_LOCATION_ERR: 1253 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1254 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1255 break; 1256 case MAT_ROW_ORIENTED: 1257 a->roworiented = PETSC_TRUE; 1258 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1259 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1260 break; 1261 case MAT_ROWS_SORTED: 1262 case MAT_ROWS_UNSORTED: 1263 case MAT_YES_NEW_DIAGONALS: 1264 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1265 break; 1266 case MAT_COLUMN_ORIENTED: 1267 a->roworiented = PETSC_FALSE; 1268 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1269 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1270 break; 1271 case MAT_IGNORE_OFF_PROC_ENTRIES: 1272 a->donotstash = PETSC_TRUE; 1273 break; 1274 case MAT_NO_NEW_DIAGONALS: 1275 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1276 case MAT_USE_HASH_TABLE: 1277 a->ht_flag = PETSC_TRUE; 1278 break; 1279 case MAT_NOT_SYMMETRIC: 1280 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1281 case MAT_HERMITIAN: 1282 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1283 case MAT_SYMMETRIC: 1284 case MAT_STRUCTURALLY_SYMMETRIC: 1285 case MAT_NOT_HERMITIAN: 1286 case MAT_SYMMETRY_ETERNAL: 1287 case MAT_NOT_SYMMETRY_ETERNAL: 1288 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1289 break; 1290 case MAT_IGNORE_LOWER_TRIANGULAR: 1291 aA->ignore_ltriangular = PETSC_TRUE; 1292 break; 1293 case MAT_ERROR_LOWER_TRIANGULAR: 1294 aA->ignore_ltriangular = PETSC_FALSE; 1295 break; 1296 case MAT_GETROW_UPPERTRIANGULAR: 1297 aA->getrow_utriangular = PETSC_TRUE; 1298 break; 1299 default: 1300 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1301 } 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1307 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1308 { 1309 PetscErrorCode ierr; 1310 PetscFunctionBegin; 1311 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1317 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1318 { 1319 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1320 Mat a=baij->A, b=baij->B; 1321 PetscErrorCode ierr; 1322 PetscInt nv,m,n; 1323 PetscTruth flg; 1324 1325 PetscFunctionBegin; 1326 if (ll != rr){ 1327 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1328 if (!flg) 1329 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1330 } 1331 if (!ll) PetscFunctionReturn(0); 1332 1333 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1334 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1335 1336 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1337 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1338 1339 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1340 1341 /* left diagonalscale the off-diagonal part */ 1342 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1343 1344 /* scale the diagonal part */ 1345 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1346 1347 /* right diagonalscale the off-diagonal part */ 1348 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1349 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1355 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1356 { 1357 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1358 PetscErrorCode ierr; 1359 1360 PetscFunctionBegin; 1361 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatEqual_MPISBAIJ" 1369 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1370 { 1371 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1372 Mat a,b,c,d; 1373 PetscTruth flg; 1374 PetscErrorCode ierr; 1375 1376 PetscFunctionBegin; 1377 a = matA->A; b = matA->B; 1378 c = matB->A; d = matB->B; 1379 1380 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1381 if (flg) { 1382 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1383 } 1384 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "MatCopy_MPISBAIJ" 1390 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1391 { 1392 PetscErrorCode ierr; 1393 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1394 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1395 1396 PetscFunctionBegin; 1397 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1398 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1399 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1400 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1401 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1402 } else { 1403 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1404 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1405 } 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1411 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1412 { 1413 PetscErrorCode ierr; 1414 1415 PetscFunctionBegin; 1416 ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #include "petscblaslapack.h" 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1423 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1424 { 1425 PetscErrorCode ierr; 1426 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1427 PetscBLASInt bnz,one=1; 1428 Mat_SeqSBAIJ *xa,*ya; 1429 Mat_SeqBAIJ *xb,*yb; 1430 1431 PetscFunctionBegin; 1432 if (str == SAME_NONZERO_PATTERN) { 1433 PetscScalar alpha = a; 1434 xa = (Mat_SeqSBAIJ *)xx->A->data; 1435 ya = (Mat_SeqSBAIJ *)yy->A->data; 1436 bnz = (PetscBLASInt)xa->nz; 1437 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1438 xb = (Mat_SeqBAIJ *)xx->B->data; 1439 yb = (Mat_SeqBAIJ *)yy->B->data; 1440 bnz = (PetscBLASInt)xb->nz; 1441 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1442 } else { 1443 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1444 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1445 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1446 } 1447 PetscFunctionReturn(0); 1448 } 1449 1450 #undef __FUNCT__ 1451 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1452 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1453 { 1454 PetscErrorCode ierr; 1455 PetscInt i; 1456 PetscTruth flg; 1457 1458 PetscFunctionBegin; 1459 for (i=0; i<n; i++) { 1460 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1461 if (!flg) { 1462 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1463 } 1464 } 1465 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1466 PetscFunctionReturn(0); 1467 } 1468 1469 1470 /* -------------------------------------------------------------------*/ 1471 static struct _MatOps MatOps_Values = { 1472 MatSetValues_MPISBAIJ, 1473 MatGetRow_MPISBAIJ, 1474 MatRestoreRow_MPISBAIJ, 1475 MatMult_MPISBAIJ, 1476 /* 4*/ MatMultAdd_MPISBAIJ, 1477 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1478 MatMultAdd_MPISBAIJ, 1479 0, 1480 0, 1481 0, 1482 /*10*/ 0, 1483 0, 1484 0, 1485 MatRelax_MPISBAIJ, 1486 MatTranspose_MPISBAIJ, 1487 /*15*/ MatGetInfo_MPISBAIJ, 1488 MatEqual_MPISBAIJ, 1489 MatGetDiagonal_MPISBAIJ, 1490 MatDiagonalScale_MPISBAIJ, 1491 MatNorm_MPISBAIJ, 1492 /*20*/ MatAssemblyBegin_MPISBAIJ, 1493 MatAssemblyEnd_MPISBAIJ, 1494 0, 1495 MatSetOption_MPISBAIJ, 1496 MatZeroEntries_MPISBAIJ, 1497 /*25*/ 0, 1498 0, 1499 0, 1500 0, 1501 0, 1502 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1503 0, 1504 0, 1505 0, 1506 0, 1507 /*35*/ MatDuplicate_MPISBAIJ, 1508 0, 1509 0, 1510 0, 1511 0, 1512 /*40*/ MatAXPY_MPISBAIJ, 1513 MatGetSubMatrices_MPISBAIJ, 1514 MatIncreaseOverlap_MPISBAIJ, 1515 MatGetValues_MPISBAIJ, 1516 MatCopy_MPISBAIJ, 1517 /*45*/ 0, 1518 MatScale_MPISBAIJ, 1519 0, 1520 0, 1521 0, 1522 /*50*/ 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 /*55*/ 0, 1528 0, 1529 MatSetUnfactored_MPISBAIJ, 1530 0, 1531 MatSetValuesBlocked_MPISBAIJ, 1532 /*60*/ 0, 1533 0, 1534 0, 1535 0, 1536 0, 1537 /*65*/ 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 /*70*/ MatGetRowMaxAbs_MPISBAIJ, 1543 0, 1544 0, 1545 0, 1546 0, 1547 /*75*/ 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 /*80*/ 0, 1553 0, 1554 0, 1555 0, 1556 MatLoad_MPISBAIJ, 1557 /*85*/ 0, 1558 0, 1559 0, 1560 0, 1561 0, 1562 /*90*/ 0, 1563 0, 1564 0, 1565 0, 1566 0, 1567 /*95*/ 0, 1568 0, 1569 0, 1570 0, 1571 0, 1572 /*100*/0, 1573 0, 1574 0, 1575 0, 1576 0, 1577 /*105*/0, 1578 MatRealPart_MPISBAIJ, 1579 MatImaginaryPart_MPISBAIJ, 1580 MatGetRowUpperTriangular_MPISBAIJ, 1581 MatRestoreRowUpperTriangular_MPISBAIJ 1582 }; 1583 1584 1585 EXTERN_C_BEGIN 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1588 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1589 { 1590 PetscFunctionBegin; 1591 *a = ((Mat_MPISBAIJ *)A->data)->A; 1592 *iscopy = PETSC_FALSE; 1593 PetscFunctionReturn(0); 1594 } 1595 EXTERN_C_END 1596 1597 EXTERN_C_BEGIN 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1600 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1601 { 1602 Mat_MPISBAIJ *b; 1603 PetscErrorCode ierr; 1604 PetscInt i,mbs,Mbs; 1605 1606 PetscFunctionBegin; 1607 ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1608 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 1609 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1610 1611 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1612 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1613 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1614 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1615 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1616 1617 B->rmap.bs = B->cmap.bs = bs; 1618 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1619 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1620 1621 if (d_nnz) { 1622 for (i=0; i<B->rmap.n/bs; i++) { 1623 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1624 } 1625 } 1626 if (o_nnz) { 1627 for (i=0; i<B->rmap.n/bs; i++) { 1628 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1629 } 1630 } 1631 B->preallocated = PETSC_TRUE; 1632 1633 b = (Mat_MPISBAIJ*)B->data; 1634 mbs = B->rmap.n/bs; 1635 Mbs = B->rmap.N/bs; 1636 if (mbs*bs != B->rmap.n) { 1637 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs); 1638 } 1639 1640 B->rmap.bs = bs; 1641 b->bs2 = bs*bs; 1642 b->mbs = mbs; 1643 b->nbs = mbs; 1644 b->Mbs = Mbs; 1645 b->Nbs = Mbs; 1646 1647 for (i=0; i<=b->size; i++) { 1648 b->rangebs[i] = B->rmap.range[i]/bs; 1649 } 1650 b->rstartbs = B->rmap.rstart/bs; 1651 b->rendbs = B->rmap.rend/bs; 1652 1653 b->cstartbs = B->cmap.rstart/bs; 1654 b->cendbs = B->cmap.rend/bs; 1655 1656 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1657 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1658 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1659 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1660 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1661 1662 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1663 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1664 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1665 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1666 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1667 1668 /* build cache for off array entries formed */ 1669 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1670 1671 PetscFunctionReturn(0); 1672 } 1673 EXTERN_C_END 1674 1675 /*MC 1676 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1677 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1678 1679 Options Database Keys: 1680 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1681 1682 Level: beginner 1683 1684 .seealso: MatCreateMPISBAIJ 1685 M*/ 1686 1687 EXTERN_C_BEGIN 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatCreate_MPISBAIJ" 1690 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1691 { 1692 Mat_MPISBAIJ *b; 1693 PetscErrorCode ierr; 1694 PetscTruth flg; 1695 1696 PetscFunctionBegin; 1697 1698 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1699 B->data = (void*)b; 1700 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1701 1702 B->ops->destroy = MatDestroy_MPISBAIJ; 1703 B->ops->view = MatView_MPISBAIJ; 1704 B->mapping = 0; 1705 B->factor = 0; 1706 B->assembled = PETSC_FALSE; 1707 1708 B->insertmode = NOT_SET_VALUES; 1709 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1710 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1711 1712 /* build local table of row and column ownerships */ 1713 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1714 1715 /* build cache for off array entries formed */ 1716 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1717 b->donotstash = PETSC_FALSE; 1718 b->colmap = PETSC_NULL; 1719 b->garray = PETSC_NULL; 1720 b->roworiented = PETSC_TRUE; 1721 1722 #if defined(PETSC_USE_MAT_SINGLE) 1723 /* stuff for MatSetValues_XXX in single precision */ 1724 b->setvalueslen = 0; 1725 b->setvaluescopy = PETSC_NULL; 1726 #endif 1727 1728 /* stuff used in block assembly */ 1729 b->barray = 0; 1730 1731 /* stuff used for matrix vector multiply */ 1732 b->lvec = 0; 1733 b->Mvctx = 0; 1734 b->slvec0 = 0; 1735 b->slvec0b = 0; 1736 b->slvec1 = 0; 1737 b->slvec1a = 0; 1738 b->slvec1b = 0; 1739 b->sMvctx = 0; 1740 1741 /* stuff for MatGetRow() */ 1742 b->rowindices = 0; 1743 b->rowvalues = 0; 1744 b->getrowactive = PETSC_FALSE; 1745 1746 /* hash table stuff */ 1747 b->ht = 0; 1748 b->hd = 0; 1749 b->ht_size = 0; 1750 b->ht_flag = PETSC_FALSE; 1751 b->ht_fact = 0; 1752 b->ht_total_ct = 0; 1753 b->ht_insert_ct = 0; 1754 1755 b->in_loc = 0; 1756 b->v_loc = 0; 1757 b->n_loc = 0; 1758 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1759 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1760 if (flg) { 1761 PetscReal fact = 1.39; 1762 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1763 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1764 if (fact <= 1.0) fact = 1.39; 1765 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1766 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1767 } 1768 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1769 1770 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1771 "MatStoreValues_MPISBAIJ", 1772 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1773 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1774 "MatRetrieveValues_MPISBAIJ", 1775 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1776 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1777 "MatGetDiagonalBlock_MPISBAIJ", 1778 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1779 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1780 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1781 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1782 B->symmetric = PETSC_TRUE; 1783 B->structurally_symmetric = PETSC_TRUE; 1784 B->symmetric_set = PETSC_TRUE; 1785 B->structurally_symmetric_set = PETSC_TRUE; 1786 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1787 PetscFunctionReturn(0); 1788 } 1789 EXTERN_C_END 1790 1791 /*MC 1792 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1793 1794 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1795 and MATMPISBAIJ otherwise. 1796 1797 Options Database Keys: 1798 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1799 1800 Level: beginner 1801 1802 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1803 M*/ 1804 1805 EXTERN_C_BEGIN 1806 #undef __FUNCT__ 1807 #define __FUNCT__ "MatCreate_SBAIJ" 1808 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1809 { 1810 PetscErrorCode ierr; 1811 PetscMPIInt size; 1812 1813 PetscFunctionBegin; 1814 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1815 if (size == 1) { 1816 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1817 } else { 1818 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1819 } 1820 PetscFunctionReturn(0); 1821 } 1822 EXTERN_C_END 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1826 /*@C 1827 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1828 the user should preallocate the matrix storage by setting the parameters 1829 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1830 performance can be increased by more than a factor of 50. 1831 1832 Collective on Mat 1833 1834 Input Parameters: 1835 + A - the matrix 1836 . bs - size of blockk 1837 . d_nz - number of block nonzeros per block row in diagonal portion of local 1838 submatrix (same for all local rows) 1839 . d_nnz - array containing the number of block nonzeros in the various block rows 1840 in the upper triangular and diagonal part of the in diagonal portion of the local 1841 (possibly different for each block row) or PETSC_NULL. You must leave room 1842 for the diagonal entry even if it is zero. 1843 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1844 submatrix (same for all local rows). 1845 - o_nnz - array containing the number of nonzeros in the various block rows of the 1846 off-diagonal portion of the local submatrix (possibly different for 1847 each block row) or PETSC_NULL. 1848 1849 1850 Options Database Keys: 1851 . -mat_no_unroll - uses code that does not unroll the loops in the 1852 block calculations (much slower) 1853 . -mat_block_size - size of the blocks to use 1854 1855 Notes: 1856 1857 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1858 than it must be used on all processors that share the object for that argument. 1859 1860 If the *_nnz parameter is given then the *_nz parameter is ignored 1861 1862 Storage Information: 1863 For a square global matrix we define each processor's diagonal portion 1864 to be its local rows and the corresponding columns (a square submatrix); 1865 each processor's off-diagonal portion encompasses the remainder of the 1866 local matrix (a rectangular submatrix). 1867 1868 The user can specify preallocated storage for the diagonal part of 1869 the local submatrix with either d_nz or d_nnz (not both). Set 1870 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1871 memory allocation. Likewise, specify preallocated storage for the 1872 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1873 1874 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1875 the figure below we depict these three local rows and all columns (0-11). 1876 1877 .vb 1878 0 1 2 3 4 5 6 7 8 9 10 11 1879 ------------------- 1880 row 3 | o o o d d d o o o o o o 1881 row 4 | o o o d d d o o o o o o 1882 row 5 | o o o d d d o o o o o o 1883 ------------------- 1884 .ve 1885 1886 Thus, any entries in the d locations are stored in the d (diagonal) 1887 submatrix, and any entries in the o locations are stored in the 1888 o (off-diagonal) submatrix. Note that the d matrix is stored in 1889 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1890 1891 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1892 plus the diagonal part of the d matrix, 1893 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1894 In general, for PDE problems in which most nonzeros are near the diagonal, 1895 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1896 or you will get TERRIBLE performance; see the users' manual chapter on 1897 matrices. 1898 1899 Level: intermediate 1900 1901 .keywords: matrix, block, aij, compressed row, sparse, parallel 1902 1903 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1904 @*/ 1905 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1906 { 1907 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1908 1909 PetscFunctionBegin; 1910 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1911 if (f) { 1912 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatCreateMPISBAIJ" 1919 /*@C 1920 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1921 (block compressed row). For good matrix assembly performance 1922 the user should preallocate the matrix storage by setting the parameters 1923 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1924 performance can be increased by more than a factor of 50. 1925 1926 Collective on MPI_Comm 1927 1928 Input Parameters: 1929 + comm - MPI communicator 1930 . bs - size of blockk 1931 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1932 This value should be the same as the local size used in creating the 1933 y vector for the matrix-vector product y = Ax. 1934 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1935 This value should be the same as the local size used in creating the 1936 x vector for the matrix-vector product y = Ax. 1937 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1938 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1939 . d_nz - number of block nonzeros per block row in diagonal portion of local 1940 submatrix (same for all local rows) 1941 . d_nnz - array containing the number of block nonzeros in the various block rows 1942 in the upper triangular portion of the in diagonal portion of the local 1943 (possibly different for each block block row) or PETSC_NULL. 1944 You must leave room for the diagonal entry even if it is zero. 1945 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1946 submatrix (same for all local rows). 1947 - o_nnz - array containing the number of nonzeros in the various block rows of the 1948 off-diagonal portion of the local submatrix (possibly different for 1949 each block row) or PETSC_NULL. 1950 1951 Output Parameter: 1952 . A - the matrix 1953 1954 Options Database Keys: 1955 . -mat_no_unroll - uses code that does not unroll the loops in the 1956 block calculations (much slower) 1957 . -mat_block_size - size of the blocks to use 1958 . -mat_mpi - use the parallel matrix data structures even on one processor 1959 (defaults to using SeqBAIJ format on one processor) 1960 1961 Notes: 1962 The number of rows and columns must be divisible by blocksize. 1963 1964 The user MUST specify either the local or global matrix dimensions 1965 (possibly both). 1966 1967 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1968 than it must be used on all processors that share the object for that argument. 1969 1970 If the *_nnz parameter is given then the *_nz parameter is ignored 1971 1972 Storage Information: 1973 For a square global matrix we define each processor's diagonal portion 1974 to be its local rows and the corresponding columns (a square submatrix); 1975 each processor's off-diagonal portion encompasses the remainder of the 1976 local matrix (a rectangular submatrix). 1977 1978 The user can specify preallocated storage for the diagonal part of 1979 the local submatrix with either d_nz or d_nnz (not both). Set 1980 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1981 memory allocation. Likewise, specify preallocated storage for the 1982 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1983 1984 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1985 the figure below we depict these three local rows and all columns (0-11). 1986 1987 .vb 1988 0 1 2 3 4 5 6 7 8 9 10 11 1989 ------------------- 1990 row 3 | o o o d d d o o o o o o 1991 row 4 | o o o d d d o o o o o o 1992 row 5 | o o o d d d o o o o o o 1993 ------------------- 1994 .ve 1995 1996 Thus, any entries in the d locations are stored in the d (diagonal) 1997 submatrix, and any entries in the o locations are stored in the 1998 o (off-diagonal) submatrix. Note that the d matrix is stored in 1999 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2000 2001 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2002 plus the diagonal part of the d matrix, 2003 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2004 In general, for PDE problems in which most nonzeros are near the diagonal, 2005 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2006 or you will get TERRIBLE performance; see the users' manual chapter on 2007 matrices. 2008 2009 Level: intermediate 2010 2011 .keywords: matrix, block, aij, compressed row, sparse, parallel 2012 2013 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2014 @*/ 2015 2016 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2017 { 2018 PetscErrorCode ierr; 2019 PetscMPIInt size; 2020 2021 PetscFunctionBegin; 2022 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2023 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2024 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2025 if (size > 1) { 2026 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2027 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2028 } else { 2029 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2030 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2031 } 2032 PetscFunctionReturn(0); 2033 } 2034 2035 2036 #undef __FUNCT__ 2037 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2038 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2039 { 2040 Mat mat; 2041 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2042 PetscErrorCode ierr; 2043 PetscInt len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs; 2044 PetscScalar *array; 2045 2046 PetscFunctionBegin; 2047 *newmat = 0; 2048 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2049 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2050 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2051 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2052 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2053 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2054 2055 mat->factor = matin->factor; 2056 mat->preallocated = PETSC_TRUE; 2057 mat->assembled = PETSC_TRUE; 2058 mat->insertmode = NOT_SET_VALUES; 2059 2060 a = (Mat_MPISBAIJ*)mat->data; 2061 a->bs2 = oldmat->bs2; 2062 a->mbs = oldmat->mbs; 2063 a->nbs = oldmat->nbs; 2064 a->Mbs = oldmat->Mbs; 2065 a->Nbs = oldmat->Nbs; 2066 2067 2068 a->size = oldmat->size; 2069 a->rank = oldmat->rank; 2070 a->donotstash = oldmat->donotstash; 2071 a->roworiented = oldmat->roworiented; 2072 a->rowindices = 0; 2073 a->rowvalues = 0; 2074 a->getrowactive = PETSC_FALSE; 2075 a->barray = 0; 2076 a->rstartbs = oldmat->rstartbs; 2077 a->rendbs = oldmat->rendbs; 2078 a->cstartbs = oldmat->cstartbs; 2079 a->cendbs = oldmat->cendbs; 2080 2081 /* hash table stuff */ 2082 a->ht = 0; 2083 a->hd = 0; 2084 a->ht_size = 0; 2085 a->ht_flag = oldmat->ht_flag; 2086 a->ht_fact = oldmat->ht_fact; 2087 a->ht_total_ct = 0; 2088 a->ht_insert_ct = 0; 2089 2090 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2091 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2092 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2093 if (oldmat->colmap) { 2094 #if defined (PETSC_USE_CTABLE) 2095 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2096 #else 2097 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2098 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2099 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2100 #endif 2101 } else a->colmap = 0; 2102 2103 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2104 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2105 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2106 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2107 } else a->garray = 0; 2108 2109 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2110 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2111 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2112 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2113 2114 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2115 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2116 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2117 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2118 2119 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2120 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2121 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2122 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2123 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2124 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2125 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2126 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2127 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2128 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2129 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2130 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2131 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2132 2133 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2134 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2135 a->sMvctx = oldmat->sMvctx; 2136 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2137 2138 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2139 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2140 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2141 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2142 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2143 *newmat = mat; 2144 PetscFunctionReturn(0); 2145 } 2146 2147 #include "petscsys.h" 2148 2149 #undef __FUNCT__ 2150 #define __FUNCT__ "MatLoad_MPISBAIJ" 2151 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2152 { 2153 Mat A; 2154 PetscErrorCode ierr; 2155 PetscInt i,nz,j,rstart,rend; 2156 PetscScalar *vals,*buf; 2157 MPI_Comm comm = ((PetscObject)viewer)->comm; 2158 MPI_Status status; 2159 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 2160 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2161 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2162 PetscInt bs=1,Mbs,mbs,extra_rows; 2163 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2164 PetscInt dcount,kmax,k,nzcount,tmp; 2165 int fd; 2166 2167 PetscFunctionBegin; 2168 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2169 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2170 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2171 2172 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2173 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2174 if (!rank) { 2175 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2176 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2177 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2178 if (header[3] < 0) { 2179 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2180 } 2181 } 2182 2183 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2184 M = header[1]; N = header[2]; 2185 2186 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2187 2188 /* 2189 This code adds extra rows to make sure the number of rows is 2190 divisible by the blocksize 2191 */ 2192 Mbs = M/bs; 2193 extra_rows = bs - M + bs*(Mbs); 2194 if (extra_rows == bs) extra_rows = 0; 2195 else Mbs++; 2196 if (extra_rows &&!rank) { 2197 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2198 } 2199 2200 /* determine ownership of all rows */ 2201 mbs = Mbs/size + ((Mbs % size) > rank); 2202 m = mbs*bs; 2203 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2204 browners = rowners + size + 1; 2205 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2206 rowners[0] = 0; 2207 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2208 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2209 rstart = rowners[rank]; 2210 rend = rowners[rank+1]; 2211 2212 /* distribute row lengths to all processors */ 2213 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2214 if (!rank) { 2215 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2216 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2217 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2218 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2219 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2220 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2221 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2222 } else { 2223 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2224 } 2225 2226 if (!rank) { /* procs[0] */ 2227 /* calculate the number of nonzeros on each processor */ 2228 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2229 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2230 for (i=0; i<size; i++) { 2231 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2232 procsnz[i] += rowlengths[j]; 2233 } 2234 } 2235 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2236 2237 /* determine max buffer needed and allocate it */ 2238 maxnz = 0; 2239 for (i=0; i<size; i++) { 2240 maxnz = PetscMax(maxnz,procsnz[i]); 2241 } 2242 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2243 2244 /* read in my part of the matrix column indices */ 2245 nz = procsnz[0]; 2246 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2247 mycols = ibuf; 2248 if (size == 1) nz -= extra_rows; 2249 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2250 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2251 2252 /* read in every ones (except the last) and ship off */ 2253 for (i=1; i<size-1; i++) { 2254 nz = procsnz[i]; 2255 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2256 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2257 } 2258 /* read in the stuff for the last proc */ 2259 if (size != 1) { 2260 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2261 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2262 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2263 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2264 } 2265 ierr = PetscFree(cols);CHKERRQ(ierr); 2266 } else { /* procs[i], i>0 */ 2267 /* determine buffer space needed for message */ 2268 nz = 0; 2269 for (i=0; i<m; i++) { 2270 nz += locrowlens[i]; 2271 } 2272 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2273 mycols = ibuf; 2274 /* receive message of column indices*/ 2275 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2276 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2277 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2278 } 2279 2280 /* loop over local rows, determining number of off diagonal entries */ 2281 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2282 odlens = dlens + (rend-rstart); 2283 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2284 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2285 masked1 = mask + Mbs; 2286 masked2 = masked1 + Mbs; 2287 rowcount = 0; nzcount = 0; 2288 for (i=0; i<mbs; i++) { 2289 dcount = 0; 2290 odcount = 0; 2291 for (j=0; j<bs; j++) { 2292 kmax = locrowlens[rowcount]; 2293 for (k=0; k<kmax; k++) { 2294 tmp = mycols[nzcount++]/bs; /* block col. index */ 2295 if (!mask[tmp]) { 2296 mask[tmp] = 1; 2297 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2298 else masked1[dcount++] = tmp; /* entry in diag portion */ 2299 } 2300 } 2301 rowcount++; 2302 } 2303 2304 dlens[i] = dcount; /* d_nzz[i] */ 2305 odlens[i] = odcount; /* o_nzz[i] */ 2306 2307 /* zero out the mask elements we set */ 2308 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2309 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2310 } 2311 2312 /* create our matrix */ 2313 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2314 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2315 ierr = MatSetType(A,type);CHKERRQ(ierr); 2316 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2317 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2318 2319 if (!rank) { 2320 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2321 /* read in my part of the matrix numerical values */ 2322 nz = procsnz[0]; 2323 vals = buf; 2324 mycols = ibuf; 2325 if (size == 1) nz -= extra_rows; 2326 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2327 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2328 2329 /* insert into matrix */ 2330 jj = rstart*bs; 2331 for (i=0; i<m; i++) { 2332 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2333 mycols += locrowlens[i]; 2334 vals += locrowlens[i]; 2335 jj++; 2336 } 2337 2338 /* read in other processors (except the last one) and ship out */ 2339 for (i=1; i<size-1; i++) { 2340 nz = procsnz[i]; 2341 vals = buf; 2342 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2343 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2344 } 2345 /* the last proc */ 2346 if (size != 1){ 2347 nz = procsnz[i] - extra_rows; 2348 vals = buf; 2349 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2350 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2351 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2352 } 2353 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2354 2355 } else { 2356 /* receive numeric values */ 2357 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2358 2359 /* receive message of values*/ 2360 vals = buf; 2361 mycols = ibuf; 2362 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2363 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2364 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2365 2366 /* insert into matrix */ 2367 jj = rstart*bs; 2368 for (i=0; i<m; i++) { 2369 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2370 mycols += locrowlens[i]; 2371 vals += locrowlens[i]; 2372 jj++; 2373 } 2374 } 2375 2376 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2377 ierr = PetscFree(buf);CHKERRQ(ierr); 2378 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2379 ierr = PetscFree(rowners);CHKERRQ(ierr); 2380 ierr = PetscFree(dlens);CHKERRQ(ierr); 2381 ierr = PetscFree(mask);CHKERRQ(ierr); 2382 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2383 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2384 *newmat = A; 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2390 /*XXXXX@ 2391 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2392 2393 Input Parameters: 2394 . mat - the matrix 2395 . fact - factor 2396 2397 Collective on Mat 2398 2399 Level: advanced 2400 2401 Notes: 2402 This can also be set by the command line option: -mat_use_hash_table fact 2403 2404 .keywords: matrix, hashtable, factor, HT 2405 2406 .seealso: MatSetOption() 2407 @XXXXX*/ 2408 2409 2410 #undef __FUNCT__ 2411 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2412 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2413 { 2414 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2415 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2416 PetscReal atmp; 2417 PetscReal *work,*svalues,*rvalues; 2418 PetscErrorCode ierr; 2419 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2420 PetscMPIInt rank,size; 2421 PetscInt *rowners_bs,dest,count,source; 2422 PetscScalar *va; 2423 MatScalar *ba; 2424 MPI_Status stat; 2425 2426 PetscFunctionBegin; 2427 if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2428 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2429 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2430 2431 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2432 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2433 2434 bs = A->rmap.bs; 2435 mbs = a->mbs; 2436 Mbs = a->Mbs; 2437 ba = b->a; 2438 bi = b->i; 2439 bj = b->j; 2440 2441 /* find ownerships */ 2442 rowners_bs = A->rmap.range; 2443 2444 /* each proc creates an array to be distributed */ 2445 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2446 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2447 2448 /* row_max for B */ 2449 if (rank != size-1){ 2450 for (i=0; i<mbs; i++) { 2451 ncols = bi[1] - bi[0]; bi++; 2452 brow = bs*i; 2453 for (j=0; j<ncols; j++){ 2454 bcol = bs*(*bj); 2455 for (kcol=0; kcol<bs; kcol++){ 2456 col = bcol + kcol; /* local col index */ 2457 col += rowners_bs[rank+1]; /* global col index */ 2458 for (krow=0; krow<bs; krow++){ 2459 atmp = PetscAbsScalar(*ba); ba++; 2460 row = brow + krow; /* local row index */ 2461 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2462 if (work[col] < atmp) work[col] = atmp; 2463 } 2464 } 2465 bj++; 2466 } 2467 } 2468 2469 /* send values to its owners */ 2470 for (dest=rank+1; dest<size; dest++){ 2471 svalues = work + rowners_bs[dest]; 2472 count = rowners_bs[dest+1]-rowners_bs[dest]; 2473 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2474 } 2475 } 2476 2477 /* receive values */ 2478 if (rank){ 2479 rvalues = work; 2480 count = rowners_bs[rank+1]-rowners_bs[rank]; 2481 for (source=0; source<rank; source++){ 2482 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2483 /* process values */ 2484 for (i=0; i<count; i++){ 2485 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2486 } 2487 } 2488 } 2489 2490 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2491 ierr = PetscFree(work);CHKERRQ(ierr); 2492 PetscFunctionReturn(0); 2493 } 2494 2495 #undef __FUNCT__ 2496 #define __FUNCT__ "MatRelax_MPISBAIJ" 2497 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2498 { 2499 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2500 PetscErrorCode ierr; 2501 PetscInt mbs=mat->mbs,bs=matin->rmap.bs; 2502 PetscScalar *x,*b,*ptr,zero=0.0; 2503 Vec bb1; 2504 2505 PetscFunctionBegin; 2506 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2507 if (bs > 1) 2508 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2509 2510 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2511 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2512 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2513 its--; 2514 } 2515 2516 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2517 while (its--){ 2518 2519 /* lower triangular part: slvec0b = - B^T*xx */ 2520 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2521 2522 /* copy xx into slvec0a */ 2523 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2524 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2525 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2526 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2527 2528 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2529 2530 /* copy bb into slvec1a */ 2531 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2532 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2533 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2534 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2535 2536 /* set slvec1b = 0 */ 2537 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2538 2539 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2540 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2541 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2542 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2543 2544 /* upper triangular part: bb1 = bb1 - B*x */ 2545 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2546 2547 /* local diagonal sweep */ 2548 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2549 } 2550 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2551 } else { 2552 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2553 } 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2559 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2560 { 2561 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2562 PetscErrorCode ierr; 2563 Vec lvec1,bb1; 2564 2565 PetscFunctionBegin; 2566 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2567 if (matin->rmap.bs > 1) 2568 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2569 2570 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2571 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2572 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2573 its--; 2574 } 2575 2576 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2577 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2578 while (its--){ 2579 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2580 2581 /* lower diagonal part: bb1 = bb - B^T*xx */ 2582 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2583 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2584 2585 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2586 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2587 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2588 2589 /* upper diagonal part: bb1 = bb1 - B*x */ 2590 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2591 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2592 2593 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2594 2595 /* diagonal sweep */ 2596 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2597 } 2598 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2599 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2600 } else { 2601 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2602 } 2603 PetscFunctionReturn(0); 2604 } 2605 2606