1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 /* UGLY, ugly, ugly 24 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 25 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 26 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 27 converts the entries into single precision and then calls ..._MatScalar() to put them 28 into the single precision data structures. 29 */ 30 #if defined(PETSC_USE_MAT_SINGLE) 31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 #else 37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 38 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 40 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 42 #endif 43 44 EXTERN_C_BEGIN 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 47 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 48 { 49 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 EXTERN_C_END 58 59 EXTERN_C_BEGIN 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 62 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 63 { 64 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 69 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 EXTERN_C_END 73 74 75 #define CHUNKSIZE 10 76 77 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 78 { \ 79 \ 80 brow = row/bs; \ 81 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 82 rmax = aimax[brow]; nrow = ailen[brow]; \ 83 bcol = col/bs; \ 84 ridx = row % bs; cidx = col % bs; \ 85 low = 0; high = nrow; \ 86 while (high-low > 3) { \ 87 t = (low+high)/2; \ 88 if (rp[t] > bcol) high = t; \ 89 else low = t; \ 90 } \ 91 for (_i=low; _i<high; _i++) { \ 92 if (rp[_i] > bcol) break; \ 93 if (rp[_i] == bcol) { \ 94 bap = ap + bs2*_i + bs*cidx + ridx; \ 95 if (addv == ADD_VALUES) *bap += value; \ 96 else *bap = value; \ 97 goto a_noinsert; \ 98 } \ 99 } \ 100 if (a->nonew == 1) goto a_noinsert; \ 101 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 102 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 103 N = nrow++ - 1; \ 104 /* shift up all the later entries in this row */ \ 105 for (ii=N; ii>=_i; ii--) { \ 106 rp[ii+1] = rp[ii]; \ 107 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 108 } \ 109 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 110 rp[_i] = bcol; \ 111 ap[bs2*_i + bs*cidx + ridx] = value; \ 112 a_noinsert:; \ 113 ailen[brow] = nrow; \ 114 } 115 #ifndef MatSetValues_SeqBAIJ_B_Private 116 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 117 { \ 118 brow = row/bs; \ 119 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 120 rmax = bimax[brow]; nrow = bilen[brow]; \ 121 bcol = col/bs; \ 122 ridx = row % bs; cidx = col % bs; \ 123 low = 0; high = nrow; \ 124 while (high-low > 3) { \ 125 t = (low+high)/2; \ 126 if (rp[t] > bcol) high = t; \ 127 else low = t; \ 128 } \ 129 for (_i=low; _i<high; _i++) { \ 130 if (rp[_i] > bcol) break; \ 131 if (rp[_i] == bcol) { \ 132 bap = ap + bs2*_i + bs*cidx + ridx; \ 133 if (addv == ADD_VALUES) *bap += value; \ 134 else *bap = value; \ 135 goto b_noinsert; \ 136 } \ 137 } \ 138 if (b->nonew == 1) goto b_noinsert; \ 139 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 140 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 141 N = nrow++ - 1; \ 142 /* shift up all the later entries in this row */ \ 143 for (ii=N; ii>=_i; ii--) { \ 144 rp[ii+1] = rp[ii]; \ 145 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 146 } \ 147 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 148 rp[_i] = bcol; \ 149 ap[bs2*_i + bs*cidx + ridx] = value; \ 150 b_noinsert:; \ 151 bilen[brow] = nrow; \ 152 } 153 #endif 154 155 #if defined(PETSC_USE_MAT_SINGLE) 156 #undef __FUNCT__ 157 #define __FUNCT__ "MatSetValues_MPISBAIJ" 158 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 159 { 160 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 161 PetscErrorCode ierr; 162 PetscInt i,N = m*n; 163 MatScalar *vsingle; 164 165 PetscFunctionBegin; 166 if (N > b->setvalueslen) { 167 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 168 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 169 b->setvalueslen = N; 170 } 171 vsingle = b->setvaluescopy; 172 173 for (i=0; i<N; i++) { 174 vsingle[i] = v[i]; 175 } 176 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 182 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 183 { 184 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 185 PetscErrorCode ierr; 186 PetscInt i,N = m*n*b->bs2; 187 MatScalar *vsingle; 188 189 PetscFunctionBegin; 190 if (N > b->setvalueslen) { 191 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 192 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 193 b->setvalueslen = N; 194 } 195 vsingle = b->setvaluescopy; 196 for (i=0; i<N; i++) { 197 vsingle[i] = v[i]; 198 } 199 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 #endif 203 204 /* Only add/insert a(i,j) with i<=j (blocks). 205 Any a(i,j) with i>j input by user is ingored. 206 */ 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar" 209 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 210 { 211 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 212 MatScalar value; 213 PetscTruth roworiented = baij->roworiented; 214 PetscErrorCode ierr; 215 PetscInt i,j,row,col; 216 PetscInt rstart_orig=mat->rmap.rstart; 217 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 218 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 219 220 /* Some Variables required in the macro */ 221 Mat A = baij->A; 222 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 223 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 224 MatScalar *aa=a->a; 225 226 Mat B = baij->B; 227 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 228 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 229 MatScalar *ba=b->a; 230 231 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 232 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 233 MatScalar *ap,*bap; 234 235 /* for stash */ 236 PetscInt n_loc, *in_loc = PETSC_NULL; 237 MatScalar *v_loc = PETSC_NULL; 238 239 PetscFunctionBegin; 240 241 if (!baij->donotstash){ 242 if (n > baij->n_loc) { 243 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 244 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 245 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 246 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 247 baij->n_loc = n; 248 } 249 in_loc = baij->in_loc; 250 v_loc = baij->v_loc; 251 } 252 253 for (i=0; i<m; i++) { 254 if (im[i] < 0) continue; 255 #if defined(PETSC_USE_DEBUG) 256 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 257 #endif 258 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 259 row = im[i] - rstart_orig; /* local row index */ 260 for (j=0; j<n; j++) { 261 if (im[i]/bs > in[j]/bs){ 262 if (a->ignore_ltriangular){ 263 continue; /* ignore lower triangular blocks */ 264 } else { 265 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 266 } 267 } 268 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 269 col = in[j] - cstart_orig; /* local col index */ 270 brow = row/bs; bcol = col/bs; 271 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 272 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 273 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 274 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 275 } else if (in[j] < 0) continue; 276 #if defined(PETSC_USE_DEBUG) 277 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 278 #endif 279 else { /* off-diag entry (B) */ 280 if (mat->was_assembled) { 281 if (!baij->colmap) { 282 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 283 } 284 #if defined (PETSC_USE_CTABLE) 285 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 286 col = col - 1; 287 #else 288 col = baij->colmap[in[j]/bs] - 1; 289 #endif 290 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 291 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 292 col = in[j]; 293 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 294 B = baij->B; 295 b = (Mat_SeqBAIJ*)(B)->data; 296 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 297 ba=b->a; 298 } else col += in[j]%bs; 299 } else col = in[j]; 300 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 301 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 302 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 303 } 304 } 305 } else { /* off processor entry */ 306 if (!baij->donotstash) { 307 n_loc = 0; 308 for (j=0; j<n; j++){ 309 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 310 in_loc[n_loc] = in[j]; 311 if (roworiented) { 312 v_loc[n_loc] = v[i*n+j]; 313 } else { 314 v_loc[n_loc] = v[j*m+i]; 315 } 316 n_loc++; 317 } 318 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 319 } 320 } 321 } 322 PetscFunctionReturn(0); 323 } 324 325 #undef __FUNCT__ 326 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 327 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 328 { 329 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 330 const MatScalar *value; 331 MatScalar *barray=baij->barray; 332 PetscTruth roworiented = baij->roworiented; 333 PetscErrorCode ierr; 334 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 335 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 336 PetscInt cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2; 337 338 PetscFunctionBegin; 339 if(!barray) { 340 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 341 baij->barray = barray; 342 } 343 344 if (roworiented) { 345 stepval = (n-1)*bs; 346 } else { 347 stepval = (m-1)*bs; 348 } 349 for (i=0; i<m; i++) { 350 if (im[i] < 0) continue; 351 #if defined(PETSC_USE_DEBUG) 352 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 353 #endif 354 if (im[i] >= rstart && im[i] < rend) { 355 row = im[i] - rstart; 356 for (j=0; j<n; j++) { 357 /* If NumCol = 1 then a copy is not required */ 358 if ((roworiented) && (n == 1)) { 359 barray = (MatScalar*) v + i*bs2; 360 } else if((!roworiented) && (m == 1)) { 361 barray = (MatScalar*) v + j*bs2; 362 } else { /* Here a copy is required */ 363 if (roworiented) { 364 value = v + i*(stepval+bs)*bs + j*bs; 365 } else { 366 value = v + j*(stepval+bs)*bs + i*bs; 367 } 368 for (ii=0; ii<bs; ii++,value+=stepval) { 369 for (jj=0; jj<bs; jj++) { 370 *barray++ = *value++; 371 } 372 } 373 barray -=bs2; 374 } 375 376 if (in[j] >= cstart && in[j] < cend){ 377 col = in[j] - cstart; 378 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 379 } 380 else if (in[j] < 0) continue; 381 #if defined(PETSC_USE_DEBUG) 382 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 383 #endif 384 else { 385 if (mat->was_assembled) { 386 if (!baij->colmap) { 387 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 388 } 389 390 #if defined(PETSC_USE_DEBUG) 391 #if defined (PETSC_USE_CTABLE) 392 { PetscInt data; 393 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 394 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 395 } 396 #else 397 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 398 #endif 399 #endif 400 #if defined (PETSC_USE_CTABLE) 401 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 402 col = (col - 1)/bs; 403 #else 404 col = (baij->colmap[in[j]] - 1)/bs; 405 #endif 406 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 407 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 408 col = in[j]; 409 } 410 } 411 else col = in[j]; 412 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 413 } 414 } 415 } else { 416 if (!baij->donotstash) { 417 if (roworiented) { 418 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 419 } else { 420 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 421 } 422 } 423 } 424 } 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatGetValues_MPISBAIJ" 430 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 431 { 432 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 433 PetscErrorCode ierr; 434 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 435 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 436 437 PetscFunctionBegin; 438 for (i=0; i<m; i++) { 439 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 440 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 441 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 442 row = idxm[i] - bsrstart; 443 for (j=0; j<n; j++) { 444 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 445 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 446 if (idxn[j] >= bscstart && idxn[j] < bscend){ 447 col = idxn[j] - bscstart; 448 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 449 } else { 450 if (!baij->colmap) { 451 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 452 } 453 #if defined (PETSC_USE_CTABLE) 454 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 455 data --; 456 #else 457 data = baij->colmap[idxn[j]/bs]-1; 458 #endif 459 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 460 else { 461 col = data + idxn[j]%bs; 462 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 463 } 464 } 465 } 466 } else { 467 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 468 } 469 } 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNCT__ 474 #define __FUNCT__ "MatNorm_MPISBAIJ" 475 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 476 { 477 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 478 PetscErrorCode ierr; 479 PetscReal sum[2],*lnorm2; 480 481 PetscFunctionBegin; 482 if (baij->size == 1) { 483 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 484 } else { 485 if (type == NORM_FROBENIUS) { 486 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 487 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 488 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 489 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 490 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 491 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 492 *norm = sqrt(sum[0] + 2*sum[1]); 493 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 494 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 495 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 496 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 497 PetscReal *rsum,*rsum2,vabs; 498 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 499 PetscInt brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs; 500 MatScalar *v; 501 502 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 503 rsum2 = rsum + mat->cmap.N; 504 ierr = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 505 /* Amat */ 506 v = amat->a; jj = amat->j; 507 for (brow=0; brow<mbs; brow++) { 508 grow = bs*(rstart + brow); 509 nz = amat->i[brow+1] - amat->i[brow]; 510 for (bcol=0; bcol<nz; bcol++){ 511 gcol = bs*(rstart + *jj); jj++; 512 for (col=0; col<bs; col++){ 513 for (row=0; row<bs; row++){ 514 vabs = PetscAbsScalar(*v); v++; 515 rsum[gcol+col] += vabs; 516 /* non-diagonal block */ 517 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 518 } 519 } 520 } 521 } 522 /* Bmat */ 523 v = bmat->a; jj = bmat->j; 524 for (brow=0; brow<mbs; brow++) { 525 grow = bs*(rstart + brow); 526 nz = bmat->i[brow+1] - bmat->i[brow]; 527 for (bcol=0; bcol<nz; bcol++){ 528 gcol = bs*garray[*jj]; jj++; 529 for (col=0; col<bs; col++){ 530 for (row=0; row<bs; row++){ 531 vabs = PetscAbsScalar(*v); v++; 532 rsum[gcol+col] += vabs; 533 rsum[grow+row] += vabs; 534 } 535 } 536 } 537 } 538 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 539 *norm = 0.0; 540 for (col=0; col<mat->cmap.N; col++) { 541 if (rsum2[col] > *norm) *norm = rsum2[col]; 542 } 543 ierr = PetscFree(rsum);CHKERRQ(ierr); 544 } else { 545 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 546 } 547 } 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 553 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 554 { 555 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 556 PetscErrorCode ierr; 557 PetscInt nstash,reallocs; 558 InsertMode addv; 559 560 PetscFunctionBegin; 561 if (baij->donotstash) { 562 PetscFunctionReturn(0); 563 } 564 565 /* make sure all processors are either in INSERTMODE or ADDMODE */ 566 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 567 if (addv == (ADD_VALUES|INSERT_VALUES)) { 568 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 569 } 570 mat->insertmode = addv; /* in case this processor had no cache */ 571 572 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 573 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 574 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 575 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 576 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 577 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 583 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 584 { 585 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 586 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 589 PetscInt *row,*col,other_disassembled; 590 PetscMPIInt n; 591 PetscTruth r1,r2,r3; 592 MatScalar *val; 593 InsertMode addv = mat->insertmode; 594 595 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 596 PetscFunctionBegin; 597 598 if (!baij->donotstash) { 599 while (1) { 600 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 601 if (!flg) break; 602 603 for (i=0; i<n;) { 604 /* Now identify the consecutive vals belonging to the same row */ 605 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 606 if (j < n) ncols = j-i; 607 else ncols = n-i; 608 /* Now assemble all these values with a single function call */ 609 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 610 i = j; 611 } 612 } 613 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 614 /* Now process the block-stash. Since the values are stashed column-oriented, 615 set the roworiented flag to column oriented, and after MatSetValues() 616 restore the original flags */ 617 r1 = baij->roworiented; 618 r2 = a->roworiented; 619 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 620 baij->roworiented = PETSC_FALSE; 621 a->roworiented = PETSC_FALSE; 622 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 623 while (1) { 624 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 625 if (!flg) break; 626 627 for (i=0; i<n;) { 628 /* Now identify the consecutive vals belonging to the same row */ 629 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 630 if (j < n) ncols = j-i; 631 else ncols = n-i; 632 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 633 i = j; 634 } 635 } 636 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 637 baij->roworiented = r1; 638 a->roworiented = r2; 639 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 640 } 641 642 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 643 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 644 645 /* determine if any processor has disassembled, if so we must 646 also disassemble ourselfs, in order that we may reassemble. */ 647 /* 648 if nonzero structure of submatrix B cannot change then we know that 649 no processor disassembled thus we can skip this stuff 650 */ 651 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 652 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 653 if (mat->was_assembled && !other_disassembled) { 654 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 655 } 656 } 657 658 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 659 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 660 } 661 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 662 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 663 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 664 665 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 666 baij->rowvalues = 0; 667 668 PetscFunctionReturn(0); 669 } 670 671 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 675 { 676 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 677 PetscErrorCode ierr; 678 PetscInt bs = mat->rmap.bs; 679 PetscMPIInt size = baij->size,rank = baij->rank; 680 PetscTruth iascii,isdraw; 681 PetscViewer sviewer; 682 PetscViewerFormat format; 683 684 PetscFunctionBegin; 685 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 686 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 687 if (iascii) { 688 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 689 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 690 MatInfo info; 691 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 692 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 694 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 695 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 696 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 697 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 698 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 699 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 700 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 701 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 702 PetscFunctionReturn(0); 703 } else if (format == PETSC_VIEWER_ASCII_INFO) { 704 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 707 PetscFunctionReturn(0); 708 } 709 } 710 711 if (isdraw) { 712 PetscDraw draw; 713 PetscTruth isnull; 714 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 715 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 716 } 717 718 if (size == 1) { 719 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 720 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 721 } else { 722 /* assemble the entire matrix onto first processor. */ 723 Mat A; 724 Mat_SeqSBAIJ *Aloc; 725 Mat_SeqBAIJ *Bloc; 726 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 727 MatScalar *a; 728 729 /* Should this be the same type as mat? */ 730 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 731 if (!rank) { 732 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 733 } else { 734 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 735 } 736 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 737 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 738 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 739 740 /* copy over the A part */ 741 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 742 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 743 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 744 745 for (i=0; i<mbs; i++) { 746 rvals[0] = bs*(baij->rstartbs + i); 747 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 748 for (j=ai[i]; j<ai[i+1]; j++) { 749 col = (baij->cstartbs+aj[j])*bs; 750 for (k=0; k<bs; k++) { 751 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 752 col++; a += bs; 753 } 754 } 755 } 756 /* copy over the B part */ 757 Bloc = (Mat_SeqBAIJ*)baij->B->data; 758 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 759 for (i=0; i<mbs; i++) { 760 761 rvals[0] = bs*(baij->rstartbs + i); 762 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 763 for (j=ai[i]; j<ai[i+1]; j++) { 764 col = baij->garray[aj[j]]*bs; 765 for (k=0; k<bs; k++) { 766 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 767 col++; a += bs; 768 } 769 } 770 } 771 ierr = PetscFree(rvals);CHKERRQ(ierr); 772 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 773 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 774 /* 775 Everyone has to call to draw the matrix since the graphics waits are 776 synchronized across all processors that share the PetscDraw object 777 */ 778 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 779 if (!rank) { 780 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 781 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 782 } 783 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 784 ierr = MatDestroy(A);CHKERRQ(ierr); 785 } 786 PetscFunctionReturn(0); 787 } 788 789 #undef __FUNCT__ 790 #define __FUNCT__ "MatView_MPISBAIJ" 791 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 792 { 793 PetscErrorCode ierr; 794 PetscTruth iascii,isdraw,issocket,isbinary; 795 796 PetscFunctionBegin; 797 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 798 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 799 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 800 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 801 if (iascii || isdraw || issocket || isbinary) { 802 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 803 } else { 804 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 805 } 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatDestroy_MPISBAIJ" 811 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 812 { 813 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 814 PetscErrorCode ierr; 815 816 PetscFunctionBegin; 817 #if defined(PETSC_USE_LOG) 818 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 819 #endif 820 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 821 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 822 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 823 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 824 #if defined (PETSC_USE_CTABLE) 825 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 826 #else 827 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 828 #endif 829 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 830 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 831 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 832 if (baij->slvec0) { 833 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 834 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 835 } 836 if (baij->slvec1) { 837 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 838 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 839 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 840 } 841 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 842 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 843 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 844 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 845 #if defined(PETSC_USE_MAT_SINGLE) 846 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 847 #endif 848 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 849 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 850 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 851 ierr = PetscFree(baij);CHKERRQ(ierr); 852 853 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 854 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 855 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 856 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 857 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatMult_MPISBAIJ" 863 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 864 { 865 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 866 PetscErrorCode ierr; 867 PetscInt nt,mbs=a->mbs,bs=A->rmap.bs; 868 PetscScalar *x,*from,zero=0.0; 869 870 PetscFunctionBegin; 871 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 872 if (nt != A->cmap.n) { 873 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 874 } 875 876 /* diagonal part */ 877 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 878 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 879 880 /* subdiagonal part */ 881 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 882 CHKMEMQ; 883 /* copy x into the vec slvec0 */ 884 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 885 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 886 CHKMEMQ; 887 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 888 CHKMEMQ; 889 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 890 891 CHKMEMQ; 892 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 893 CHKMEMQ; 894 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 895 CHKMEMQ; 896 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 897 CHKMEMQ; 898 /* supperdiagonal part */ 899 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 900 CHKMEMQ; 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 906 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 907 { 908 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscInt nt; 911 912 PetscFunctionBegin; 913 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 914 if (nt != A->cmap.n) { 915 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 916 } 917 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 918 if (nt != A->rmap.N) { 919 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 920 } 921 922 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 923 /* do diagonal part */ 924 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 925 /* do supperdiagonal part */ 926 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 927 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 928 /* do subdiagonal part */ 929 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 930 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 931 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 932 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 938 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 939 { 940 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 941 PetscErrorCode ierr; 942 PetscInt mbs=a->mbs,bs=A->rmap.bs; 943 PetscScalar *x,*from,zero=0.0; 944 945 PetscFunctionBegin; 946 /* 947 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 948 PetscSynchronizedFlush(A->comm); 949 */ 950 /* diagonal part */ 951 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 952 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 953 954 /* subdiagonal part */ 955 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 956 957 /* copy x into the vec slvec0 */ 958 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 959 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 960 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 961 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 962 963 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 964 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 965 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 966 967 /* supperdiagonal part */ 968 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 969 970 PetscFunctionReturn(0); 971 } 972 973 #undef __FUNCT__ 974 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 975 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 976 { 977 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 978 PetscErrorCode ierr; 979 980 PetscFunctionBegin; 981 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 982 /* do diagonal part */ 983 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 984 /* do supperdiagonal part */ 985 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 986 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 987 988 /* do subdiagonal part */ 989 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 990 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 991 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 992 993 PetscFunctionReturn(0); 994 } 995 996 /* 997 This only works correctly for square matrices where the subblock A->A is the 998 diagonal block 999 */ 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1002 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1003 { 1004 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1005 PetscErrorCode ierr; 1006 1007 PetscFunctionBegin; 1008 /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1009 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatScale_MPISBAIJ" 1015 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1016 { 1017 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1018 PetscErrorCode ierr; 1019 1020 PetscFunctionBegin; 1021 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1022 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1023 PetscFunctionReturn(0); 1024 } 1025 1026 #undef __FUNCT__ 1027 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1028 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1029 { 1030 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1031 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1032 PetscErrorCode ierr; 1033 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1034 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1035 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1036 1037 PetscFunctionBegin; 1038 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1039 mat->getrowactive = PETSC_TRUE; 1040 1041 if (!mat->rowvalues && (idx || v)) { 1042 /* 1043 allocate enough space to hold information from the longest row. 1044 */ 1045 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1046 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1047 PetscInt max = 1,mbs = mat->mbs,tmp; 1048 for (i=0; i<mbs; i++) { 1049 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1050 if (max < tmp) { max = tmp; } 1051 } 1052 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1053 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1054 } 1055 1056 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1057 lrow = row - brstart; /* local row index */ 1058 1059 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1060 if (!v) {pvA = 0; pvB = 0;} 1061 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1062 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1063 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1064 nztot = nzA + nzB; 1065 1066 cmap = mat->garray; 1067 if (v || idx) { 1068 if (nztot) { 1069 /* Sort by increasing column numbers, assuming A and B already sorted */ 1070 PetscInt imark = -1; 1071 if (v) { 1072 *v = v_p = mat->rowvalues; 1073 for (i=0; i<nzB; i++) { 1074 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1075 else break; 1076 } 1077 imark = i; 1078 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1079 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1080 } 1081 if (idx) { 1082 *idx = idx_p = mat->rowindices; 1083 if (imark > -1) { 1084 for (i=0; i<imark; i++) { 1085 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1086 } 1087 } else { 1088 for (i=0; i<nzB; i++) { 1089 if (cmap[cworkB[i]/bs] < cstart) 1090 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1091 else break; 1092 } 1093 imark = i; 1094 } 1095 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1096 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1097 } 1098 } else { 1099 if (idx) *idx = 0; 1100 if (v) *v = 0; 1101 } 1102 } 1103 *nz = nztot; 1104 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1105 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1111 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1112 { 1113 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1114 1115 PetscFunctionBegin; 1116 if (!baij->getrowactive) { 1117 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1118 } 1119 baij->getrowactive = PETSC_FALSE; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1125 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1126 { 1127 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1128 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1129 1130 PetscFunctionBegin; 1131 aA->getrow_utriangular = PETSC_TRUE; 1132 PetscFunctionReturn(0); 1133 } 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1136 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1137 { 1138 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1139 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1140 1141 PetscFunctionBegin; 1142 aA->getrow_utriangular = PETSC_FALSE; 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1148 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1149 { 1150 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1151 PetscErrorCode ierr; 1152 1153 PetscFunctionBegin; 1154 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1155 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNCT__ 1160 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1161 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1162 { 1163 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1164 PetscErrorCode ierr; 1165 1166 PetscFunctionBegin; 1167 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1168 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1174 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1175 { 1176 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1177 PetscErrorCode ierr; 1178 1179 PetscFunctionBegin; 1180 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1181 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1187 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1188 { 1189 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1190 Mat A = a->A,B = a->B; 1191 PetscErrorCode ierr; 1192 PetscReal isend[5],irecv[5]; 1193 1194 PetscFunctionBegin; 1195 info->block_size = (PetscReal)matin->rmap.bs; 1196 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1197 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1198 isend[3] = info->memory; isend[4] = info->mallocs; 1199 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1200 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1201 isend[3] += info->memory; isend[4] += info->mallocs; 1202 if (flag == MAT_LOCAL) { 1203 info->nz_used = isend[0]; 1204 info->nz_allocated = isend[1]; 1205 info->nz_unneeded = isend[2]; 1206 info->memory = isend[3]; 1207 info->mallocs = isend[4]; 1208 } else if (flag == MAT_GLOBAL_MAX) { 1209 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1210 info->nz_used = irecv[0]; 1211 info->nz_allocated = irecv[1]; 1212 info->nz_unneeded = irecv[2]; 1213 info->memory = irecv[3]; 1214 info->mallocs = irecv[4]; 1215 } else if (flag == MAT_GLOBAL_SUM) { 1216 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1217 info->nz_used = irecv[0]; 1218 info->nz_allocated = irecv[1]; 1219 info->nz_unneeded = irecv[2]; 1220 info->memory = irecv[3]; 1221 info->mallocs = irecv[4]; 1222 } else { 1223 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1224 } 1225 info->rows_global = (PetscReal)A->rmap.N; 1226 info->columns_global = (PetscReal)A->cmap.N; 1227 info->rows_local = (PetscReal)A->rmap.N; 1228 info->columns_local = (PetscReal)A->cmap.N; 1229 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1230 info->fill_ratio_needed = 0; 1231 info->factor_mallocs = 0; 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1237 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1238 { 1239 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1240 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 switch (op) { 1245 case MAT_NO_NEW_NONZERO_LOCATIONS: 1246 case MAT_YES_NEW_NONZERO_LOCATIONS: 1247 case MAT_COLUMNS_UNSORTED: 1248 case MAT_COLUMNS_SORTED: 1249 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1250 case MAT_KEEP_ZEROED_ROWS: 1251 case MAT_NEW_NONZERO_LOCATION_ERR: 1252 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1253 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1254 break; 1255 case MAT_ROW_ORIENTED: 1256 a->roworiented = PETSC_TRUE; 1257 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1258 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1259 break; 1260 case MAT_ROWS_SORTED: 1261 case MAT_ROWS_UNSORTED: 1262 case MAT_YES_NEW_DIAGONALS: 1263 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1264 break; 1265 case MAT_COLUMN_ORIENTED: 1266 a->roworiented = PETSC_FALSE; 1267 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1268 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1269 break; 1270 case MAT_IGNORE_OFF_PROC_ENTRIES: 1271 a->donotstash = PETSC_TRUE; 1272 break; 1273 case MAT_NO_NEW_DIAGONALS: 1274 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1275 case MAT_USE_HASH_TABLE: 1276 a->ht_flag = PETSC_TRUE; 1277 break; 1278 case MAT_NOT_SYMMETRIC: 1279 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1280 case MAT_HERMITIAN: 1281 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1282 case MAT_SYMMETRIC: 1283 case MAT_STRUCTURALLY_SYMMETRIC: 1284 case MAT_NOT_HERMITIAN: 1285 case MAT_SYMMETRY_ETERNAL: 1286 case MAT_NOT_SYMMETRY_ETERNAL: 1287 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1288 break; 1289 case MAT_IGNORE_LOWER_TRIANGULAR: 1290 aA->ignore_ltriangular = PETSC_TRUE; 1291 break; 1292 case MAT_ERROR_LOWER_TRIANGULAR: 1293 aA->ignore_ltriangular = PETSC_FALSE; 1294 break; 1295 case MAT_GETROW_UPPERTRIANGULAR: 1296 aA->getrow_utriangular = PETSC_TRUE; 1297 break; 1298 default: 1299 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1300 } 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1306 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1307 { 1308 PetscErrorCode ierr; 1309 PetscFunctionBegin; 1310 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1316 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1317 { 1318 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1319 Mat a=baij->A, b=baij->B; 1320 PetscErrorCode ierr; 1321 PetscInt nv,m,n; 1322 PetscTruth flg; 1323 1324 PetscFunctionBegin; 1325 if (ll != rr){ 1326 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1327 if (!flg) 1328 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1329 } 1330 if (!ll) PetscFunctionReturn(0); 1331 1332 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1333 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1334 1335 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1336 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1337 1338 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1339 1340 /* left diagonalscale the off-diagonal part */ 1341 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1342 1343 /* scale the diagonal part */ 1344 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1345 1346 /* right diagonalscale the off-diagonal part */ 1347 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1348 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1354 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1355 { 1356 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1357 PetscErrorCode ierr; 1358 1359 PetscFunctionBegin; 1360 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatEqual_MPISBAIJ" 1368 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1369 { 1370 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1371 Mat a,b,c,d; 1372 PetscTruth flg; 1373 PetscErrorCode ierr; 1374 1375 PetscFunctionBegin; 1376 a = matA->A; b = matA->B; 1377 c = matB->A; d = matB->B; 1378 1379 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1380 if (flg) { 1381 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1382 } 1383 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatCopy_MPISBAIJ" 1389 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1390 { 1391 PetscErrorCode ierr; 1392 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1393 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1394 1395 PetscFunctionBegin; 1396 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1397 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1398 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1399 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1400 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1401 } else { 1402 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1403 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1410 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1411 { 1412 PetscErrorCode ierr; 1413 1414 PetscFunctionBegin; 1415 ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #include "petscblaslapack.h" 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1422 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1423 { 1424 PetscErrorCode ierr; 1425 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1426 PetscBLASInt bnz,one=1; 1427 Mat_SeqSBAIJ *xa,*ya; 1428 Mat_SeqBAIJ *xb,*yb; 1429 1430 PetscFunctionBegin; 1431 if (str == SAME_NONZERO_PATTERN) { 1432 PetscScalar alpha = a; 1433 xa = (Mat_SeqSBAIJ *)xx->A->data; 1434 ya = (Mat_SeqSBAIJ *)yy->A->data; 1435 bnz = (PetscBLASInt)xa->nz; 1436 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1437 xb = (Mat_SeqBAIJ *)xx->B->data; 1438 yb = (Mat_SeqBAIJ *)yy->B->data; 1439 bnz = (PetscBLASInt)xb->nz; 1440 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1441 } else { 1442 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1443 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1444 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1445 } 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1451 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1452 { 1453 PetscErrorCode ierr; 1454 PetscInt i; 1455 PetscTruth flg; 1456 1457 PetscFunctionBegin; 1458 for (i=0; i<n; i++) { 1459 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1460 if (!flg) { 1461 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1462 } 1463 } 1464 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } 1467 1468 1469 /* -------------------------------------------------------------------*/ 1470 static struct _MatOps MatOps_Values = { 1471 MatSetValues_MPISBAIJ, 1472 MatGetRow_MPISBAIJ, 1473 MatRestoreRow_MPISBAIJ, 1474 MatMult_MPISBAIJ, 1475 /* 4*/ MatMultAdd_MPISBAIJ, 1476 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1477 MatMultAdd_MPISBAIJ, 1478 0, 1479 0, 1480 0, 1481 /*10*/ 0, 1482 0, 1483 0, 1484 MatRelax_MPISBAIJ, 1485 MatTranspose_MPISBAIJ, 1486 /*15*/ MatGetInfo_MPISBAIJ, 1487 MatEqual_MPISBAIJ, 1488 MatGetDiagonal_MPISBAIJ, 1489 MatDiagonalScale_MPISBAIJ, 1490 MatNorm_MPISBAIJ, 1491 /*20*/ MatAssemblyBegin_MPISBAIJ, 1492 MatAssemblyEnd_MPISBAIJ, 1493 0, 1494 MatSetOption_MPISBAIJ, 1495 MatZeroEntries_MPISBAIJ, 1496 /*25*/ 0, 1497 0, 1498 0, 1499 0, 1500 0, 1501 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1502 0, 1503 0, 1504 0, 1505 0, 1506 /*35*/ MatDuplicate_MPISBAIJ, 1507 0, 1508 0, 1509 0, 1510 0, 1511 /*40*/ MatAXPY_MPISBAIJ, 1512 MatGetSubMatrices_MPISBAIJ, 1513 MatIncreaseOverlap_MPISBAIJ, 1514 MatGetValues_MPISBAIJ, 1515 MatCopy_MPISBAIJ, 1516 /*45*/ 0, 1517 MatScale_MPISBAIJ, 1518 0, 1519 0, 1520 0, 1521 /*50*/ 0, 1522 0, 1523 0, 1524 0, 1525 0, 1526 /*55*/ 0, 1527 0, 1528 MatSetUnfactored_MPISBAIJ, 1529 0, 1530 MatSetValuesBlocked_MPISBAIJ, 1531 /*60*/ 0, 1532 0, 1533 0, 1534 0, 1535 0, 1536 /*65*/ 0, 1537 0, 1538 0, 1539 0, 1540 0, 1541 /*70*/ MatGetRowMaxAbs_MPISBAIJ, 1542 0, 1543 0, 1544 0, 1545 0, 1546 /*75*/ 0, 1547 0, 1548 0, 1549 0, 1550 0, 1551 /*80*/ 0, 1552 0, 1553 0, 1554 0, 1555 MatLoad_MPISBAIJ, 1556 /*85*/ 0, 1557 0, 1558 0, 1559 0, 1560 0, 1561 /*90*/ 0, 1562 0, 1563 0, 1564 0, 1565 0, 1566 /*95*/ 0, 1567 0, 1568 0, 1569 0, 1570 0, 1571 /*100*/0, 1572 0, 1573 0, 1574 0, 1575 0, 1576 /*105*/0, 1577 MatRealPart_MPISBAIJ, 1578 MatImaginaryPart_MPISBAIJ, 1579 MatGetRowUpperTriangular_MPISBAIJ, 1580 MatRestoreRowUpperTriangular_MPISBAIJ 1581 }; 1582 1583 1584 EXTERN_C_BEGIN 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1587 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1588 { 1589 PetscFunctionBegin; 1590 *a = ((Mat_MPISBAIJ *)A->data)->A; 1591 *iscopy = PETSC_FALSE; 1592 PetscFunctionReturn(0); 1593 } 1594 EXTERN_C_END 1595 1596 EXTERN_C_BEGIN 1597 #undef __FUNCT__ 1598 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1599 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1600 { 1601 Mat_MPISBAIJ *b; 1602 PetscErrorCode ierr; 1603 PetscInt i,mbs,Mbs; 1604 1605 PetscFunctionBegin; 1606 ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1607 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 1608 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1609 1610 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1611 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1612 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1613 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1614 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1615 1616 B->rmap.bs = B->cmap.bs = bs; 1617 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1618 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1619 1620 if (d_nnz) { 1621 for (i=0; i<B->rmap.n/bs; i++) { 1622 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1623 } 1624 } 1625 if (o_nnz) { 1626 for (i=0; i<B->rmap.n/bs; i++) { 1627 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1628 } 1629 } 1630 B->preallocated = PETSC_TRUE; 1631 1632 b = (Mat_MPISBAIJ*)B->data; 1633 mbs = B->rmap.n/bs; 1634 Mbs = B->rmap.N/bs; 1635 if (mbs*bs != B->rmap.n) { 1636 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs); 1637 } 1638 1639 B->rmap.bs = bs; 1640 b->bs2 = bs*bs; 1641 b->mbs = mbs; 1642 b->nbs = mbs; 1643 b->Mbs = Mbs; 1644 b->Nbs = Mbs; 1645 1646 for (i=0; i<=b->size; i++) { 1647 b->rangebs[i] = B->rmap.range[i]/bs; 1648 } 1649 b->rstartbs = B->rmap.rstart/bs; 1650 b->rendbs = B->rmap.rend/bs; 1651 1652 b->cstartbs = B->cmap.rstart/bs; 1653 b->cendbs = B->cmap.rend/bs; 1654 1655 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1656 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1657 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1658 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1659 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1660 1661 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1662 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1663 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1664 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1665 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1666 1667 /* build cache for off array entries formed */ 1668 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1669 1670 PetscFunctionReturn(0); 1671 } 1672 EXTERN_C_END 1673 1674 /*MC 1675 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1676 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1677 1678 Options Database Keys: 1679 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1680 1681 Level: beginner 1682 1683 .seealso: MatCreateMPISBAIJ 1684 M*/ 1685 1686 EXTERN_C_BEGIN 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatCreate_MPISBAIJ" 1689 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1690 { 1691 Mat_MPISBAIJ *b; 1692 PetscErrorCode ierr; 1693 PetscTruth flg; 1694 1695 PetscFunctionBegin; 1696 1697 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1698 B->data = (void*)b; 1699 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1700 1701 B->ops->destroy = MatDestroy_MPISBAIJ; 1702 B->ops->view = MatView_MPISBAIJ; 1703 B->mapping = 0; 1704 B->factor = 0; 1705 B->assembled = PETSC_FALSE; 1706 1707 B->insertmode = NOT_SET_VALUES; 1708 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1709 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1710 1711 /* build local table of row and column ownerships */ 1712 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1713 1714 /* build cache for off array entries formed */ 1715 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1716 b->donotstash = PETSC_FALSE; 1717 b->colmap = PETSC_NULL; 1718 b->garray = PETSC_NULL; 1719 b->roworiented = PETSC_TRUE; 1720 1721 #if defined(PETSC_USE_MAT_SINGLE) 1722 /* stuff for MatSetValues_XXX in single precision */ 1723 b->setvalueslen = 0; 1724 b->setvaluescopy = PETSC_NULL; 1725 #endif 1726 1727 /* stuff used in block assembly */ 1728 b->barray = 0; 1729 1730 /* stuff used for matrix vector multiply */ 1731 b->lvec = 0; 1732 b->Mvctx = 0; 1733 b->slvec0 = 0; 1734 b->slvec0b = 0; 1735 b->slvec1 = 0; 1736 b->slvec1a = 0; 1737 b->slvec1b = 0; 1738 b->sMvctx = 0; 1739 1740 /* stuff for MatGetRow() */ 1741 b->rowindices = 0; 1742 b->rowvalues = 0; 1743 b->getrowactive = PETSC_FALSE; 1744 1745 /* hash table stuff */ 1746 b->ht = 0; 1747 b->hd = 0; 1748 b->ht_size = 0; 1749 b->ht_flag = PETSC_FALSE; 1750 b->ht_fact = 0; 1751 b->ht_total_ct = 0; 1752 b->ht_insert_ct = 0; 1753 1754 b->in_loc = 0; 1755 b->v_loc = 0; 1756 b->n_loc = 0; 1757 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1758 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1759 if (flg) { 1760 PetscReal fact = 1.39; 1761 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1762 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1763 if (fact <= 1.0) fact = 1.39; 1764 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1765 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1766 } 1767 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1768 1769 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1770 "MatStoreValues_MPISBAIJ", 1771 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1772 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1773 "MatRetrieveValues_MPISBAIJ", 1774 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1775 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1776 "MatGetDiagonalBlock_MPISBAIJ", 1777 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1778 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1779 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1780 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1781 B->symmetric = PETSC_TRUE; 1782 B->structurally_symmetric = PETSC_TRUE; 1783 B->symmetric_set = PETSC_TRUE; 1784 B->structurally_symmetric_set = PETSC_TRUE; 1785 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1786 PetscFunctionReturn(0); 1787 } 1788 EXTERN_C_END 1789 1790 /*MC 1791 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1792 1793 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1794 and MATMPISBAIJ otherwise. 1795 1796 Options Database Keys: 1797 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1798 1799 Level: beginner 1800 1801 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1802 M*/ 1803 1804 EXTERN_C_BEGIN 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatCreate_SBAIJ" 1807 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1808 { 1809 PetscErrorCode ierr; 1810 PetscMPIInt size; 1811 1812 PetscFunctionBegin; 1813 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1814 if (size == 1) { 1815 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1816 } else { 1817 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1818 } 1819 PetscFunctionReturn(0); 1820 } 1821 EXTERN_C_END 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1825 /*@C 1826 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1827 the user should preallocate the matrix storage by setting the parameters 1828 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1829 performance can be increased by more than a factor of 50. 1830 1831 Collective on Mat 1832 1833 Input Parameters: 1834 + A - the matrix 1835 . bs - size of blockk 1836 . d_nz - number of block nonzeros per block row in diagonal portion of local 1837 submatrix (same for all local rows) 1838 . d_nnz - array containing the number of block nonzeros in the various block rows 1839 in the upper triangular and diagonal part of the in diagonal portion of the local 1840 (possibly different for each block row) or PETSC_NULL. You must leave room 1841 for the diagonal entry even if it is zero. 1842 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1843 submatrix (same for all local rows). 1844 - o_nnz - array containing the number of nonzeros in the various block rows of the 1845 off-diagonal portion of the local submatrix (possibly different for 1846 each block row) or PETSC_NULL. 1847 1848 1849 Options Database Keys: 1850 . -mat_no_unroll - uses code that does not unroll the loops in the 1851 block calculations (much slower) 1852 . -mat_block_size - size of the blocks to use 1853 1854 Notes: 1855 1856 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1857 than it must be used on all processors that share the object for that argument. 1858 1859 If the *_nnz parameter is given then the *_nz parameter is ignored 1860 1861 Storage Information: 1862 For a square global matrix we define each processor's diagonal portion 1863 to be its local rows and the corresponding columns (a square submatrix); 1864 each processor's off-diagonal portion encompasses the remainder of the 1865 local matrix (a rectangular submatrix). 1866 1867 The user can specify preallocated storage for the diagonal part of 1868 the local submatrix with either d_nz or d_nnz (not both). Set 1869 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1870 memory allocation. Likewise, specify preallocated storage for the 1871 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1872 1873 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1874 the figure below we depict these three local rows and all columns (0-11). 1875 1876 .vb 1877 0 1 2 3 4 5 6 7 8 9 10 11 1878 ------------------- 1879 row 3 | o o o d d d o o o o o o 1880 row 4 | o o o d d d o o o o o o 1881 row 5 | o o o d d d o o o o o o 1882 ------------------- 1883 .ve 1884 1885 Thus, any entries in the d locations are stored in the d (diagonal) 1886 submatrix, and any entries in the o locations are stored in the 1887 o (off-diagonal) submatrix. Note that the d matrix is stored in 1888 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1889 1890 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1891 plus the diagonal part of the d matrix, 1892 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1893 In general, for PDE problems in which most nonzeros are near the diagonal, 1894 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1895 or you will get TERRIBLE performance; see the users' manual chapter on 1896 matrices. 1897 1898 Level: intermediate 1899 1900 .keywords: matrix, block, aij, compressed row, sparse, parallel 1901 1902 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1903 @*/ 1904 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1905 { 1906 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1907 1908 PetscFunctionBegin; 1909 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1910 if (f) { 1911 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1912 } 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatCreateMPISBAIJ" 1918 /*@C 1919 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1920 (block compressed row). For good matrix assembly performance 1921 the user should preallocate the matrix storage by setting the parameters 1922 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1923 performance can be increased by more than a factor of 50. 1924 1925 Collective on MPI_Comm 1926 1927 Input Parameters: 1928 + comm - MPI communicator 1929 . bs - size of blockk 1930 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1931 This value should be the same as the local size used in creating the 1932 y vector for the matrix-vector product y = Ax. 1933 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1934 This value should be the same as the local size used in creating the 1935 x vector for the matrix-vector product y = Ax. 1936 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1937 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1938 . d_nz - number of block nonzeros per block row in diagonal portion of local 1939 submatrix (same for all local rows) 1940 . d_nnz - array containing the number of block nonzeros in the various block rows 1941 in the upper triangular portion of the in diagonal portion of the local 1942 (possibly different for each block block row) or PETSC_NULL. 1943 You must leave room for the diagonal entry even if it is zero. 1944 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1945 submatrix (same for all local rows). 1946 - o_nnz - array containing the number of nonzeros in the various block rows of the 1947 off-diagonal portion of the local submatrix (possibly different for 1948 each block row) or PETSC_NULL. 1949 1950 Output Parameter: 1951 . A - the matrix 1952 1953 Options Database Keys: 1954 . -mat_no_unroll - uses code that does not unroll the loops in the 1955 block calculations (much slower) 1956 . -mat_block_size - size of the blocks to use 1957 . -mat_mpi - use the parallel matrix data structures even on one processor 1958 (defaults to using SeqBAIJ format on one processor) 1959 1960 Notes: 1961 The number of rows and columns must be divisible by blocksize. 1962 1963 The user MUST specify either the local or global matrix dimensions 1964 (possibly both). 1965 1966 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1967 than it must be used on all processors that share the object for that argument. 1968 1969 If the *_nnz parameter is given then the *_nz parameter is ignored 1970 1971 Storage Information: 1972 For a square global matrix we define each processor's diagonal portion 1973 to be its local rows and the corresponding columns (a square submatrix); 1974 each processor's off-diagonal portion encompasses the remainder of the 1975 local matrix (a rectangular submatrix). 1976 1977 The user can specify preallocated storage for the diagonal part of 1978 the local submatrix with either d_nz or d_nnz (not both). Set 1979 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1980 memory allocation. Likewise, specify preallocated storage for the 1981 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1982 1983 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1984 the figure below we depict these three local rows and all columns (0-11). 1985 1986 .vb 1987 0 1 2 3 4 5 6 7 8 9 10 11 1988 ------------------- 1989 row 3 | o o o d d d o o o o o o 1990 row 4 | o o o d d d o o o o o o 1991 row 5 | o o o d d d o o o o o o 1992 ------------------- 1993 .ve 1994 1995 Thus, any entries in the d locations are stored in the d (diagonal) 1996 submatrix, and any entries in the o locations are stored in the 1997 o (off-diagonal) submatrix. Note that the d matrix is stored in 1998 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1999 2000 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2001 plus the diagonal part of the d matrix, 2002 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2003 In general, for PDE problems in which most nonzeros are near the diagonal, 2004 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2005 or you will get TERRIBLE performance; see the users' manual chapter on 2006 matrices. 2007 2008 Level: intermediate 2009 2010 .keywords: matrix, block, aij, compressed row, sparse, parallel 2011 2012 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2013 @*/ 2014 2015 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2016 { 2017 PetscErrorCode ierr; 2018 PetscMPIInt size; 2019 2020 PetscFunctionBegin; 2021 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2022 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2023 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2024 if (size > 1) { 2025 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2026 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2027 } else { 2028 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2029 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2030 } 2031 PetscFunctionReturn(0); 2032 } 2033 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2037 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2038 { 2039 Mat mat; 2040 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2041 PetscErrorCode ierr; 2042 PetscInt len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs; 2043 PetscScalar *array; 2044 2045 PetscFunctionBegin; 2046 *newmat = 0; 2047 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2048 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2049 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2050 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2051 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2052 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2053 2054 mat->factor = matin->factor; 2055 mat->preallocated = PETSC_TRUE; 2056 mat->assembled = PETSC_TRUE; 2057 mat->insertmode = NOT_SET_VALUES; 2058 2059 a = (Mat_MPISBAIJ*)mat->data; 2060 a->bs2 = oldmat->bs2; 2061 a->mbs = oldmat->mbs; 2062 a->nbs = oldmat->nbs; 2063 a->Mbs = oldmat->Mbs; 2064 a->Nbs = oldmat->Nbs; 2065 2066 2067 a->size = oldmat->size; 2068 a->rank = oldmat->rank; 2069 a->donotstash = oldmat->donotstash; 2070 a->roworiented = oldmat->roworiented; 2071 a->rowindices = 0; 2072 a->rowvalues = 0; 2073 a->getrowactive = PETSC_FALSE; 2074 a->barray = 0; 2075 a->rstartbs = oldmat->rstartbs; 2076 a->rendbs = oldmat->rendbs; 2077 a->cstartbs = oldmat->cstartbs; 2078 a->cendbs = oldmat->cendbs; 2079 2080 /* hash table stuff */ 2081 a->ht = 0; 2082 a->hd = 0; 2083 a->ht_size = 0; 2084 a->ht_flag = oldmat->ht_flag; 2085 a->ht_fact = oldmat->ht_fact; 2086 a->ht_total_ct = 0; 2087 a->ht_insert_ct = 0; 2088 2089 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2090 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2091 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2092 if (oldmat->colmap) { 2093 #if defined (PETSC_USE_CTABLE) 2094 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2095 #else 2096 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2097 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2098 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2099 #endif 2100 } else a->colmap = 0; 2101 2102 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2103 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2104 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2105 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2106 } else a->garray = 0; 2107 2108 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2109 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2110 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2111 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2112 2113 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2114 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2115 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2116 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2117 2118 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2119 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2120 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2121 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2122 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2123 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2124 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2125 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2126 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2127 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2128 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2129 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2130 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2131 2132 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2133 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2134 a->sMvctx = oldmat->sMvctx; 2135 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2136 2137 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2138 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2139 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2140 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2141 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2142 *newmat = mat; 2143 PetscFunctionReturn(0); 2144 } 2145 2146 #include "petscsys.h" 2147 2148 #undef __FUNCT__ 2149 #define __FUNCT__ "MatLoad_MPISBAIJ" 2150 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2151 { 2152 Mat A; 2153 PetscErrorCode ierr; 2154 PetscInt i,nz,j,rstart,rend; 2155 PetscScalar *vals,*buf; 2156 MPI_Comm comm = ((PetscObject)viewer)->comm; 2157 MPI_Status status; 2158 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 2159 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2160 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2161 PetscInt bs=1,Mbs,mbs,extra_rows; 2162 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2163 PetscInt dcount,kmax,k,nzcount,tmp; 2164 int fd; 2165 2166 PetscFunctionBegin; 2167 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2168 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2169 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2170 2171 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2172 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2173 if (!rank) { 2174 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2175 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2176 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2177 if (header[3] < 0) { 2178 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2179 } 2180 } 2181 2182 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2183 M = header[1]; N = header[2]; 2184 2185 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2186 2187 /* 2188 This code adds extra rows to make sure the number of rows is 2189 divisible by the blocksize 2190 */ 2191 Mbs = M/bs; 2192 extra_rows = bs - M + bs*(Mbs); 2193 if (extra_rows == bs) extra_rows = 0; 2194 else Mbs++; 2195 if (extra_rows &&!rank) { 2196 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2197 } 2198 2199 /* determine ownership of all rows */ 2200 mbs = Mbs/size + ((Mbs % size) > rank); 2201 m = mbs*bs; 2202 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2203 browners = rowners + size + 1; 2204 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2205 rowners[0] = 0; 2206 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2207 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2208 rstart = rowners[rank]; 2209 rend = rowners[rank+1]; 2210 2211 /* distribute row lengths to all processors */ 2212 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2213 if (!rank) { 2214 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2215 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2216 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2217 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2218 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2219 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2220 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2221 } else { 2222 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2223 } 2224 2225 if (!rank) { /* procs[0] */ 2226 /* calculate the number of nonzeros on each processor */ 2227 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2228 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2229 for (i=0; i<size; i++) { 2230 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2231 procsnz[i] += rowlengths[j]; 2232 } 2233 } 2234 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2235 2236 /* determine max buffer needed and allocate it */ 2237 maxnz = 0; 2238 for (i=0; i<size; i++) { 2239 maxnz = PetscMax(maxnz,procsnz[i]); 2240 } 2241 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2242 2243 /* read in my part of the matrix column indices */ 2244 nz = procsnz[0]; 2245 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2246 mycols = ibuf; 2247 if (size == 1) nz -= extra_rows; 2248 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2249 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2250 2251 /* read in every ones (except the last) and ship off */ 2252 for (i=1; i<size-1; i++) { 2253 nz = procsnz[i]; 2254 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2255 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2256 } 2257 /* read in the stuff for the last proc */ 2258 if (size != 1) { 2259 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2260 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2261 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2262 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2263 } 2264 ierr = PetscFree(cols);CHKERRQ(ierr); 2265 } else { /* procs[i], i>0 */ 2266 /* determine buffer space needed for message */ 2267 nz = 0; 2268 for (i=0; i<m; i++) { 2269 nz += locrowlens[i]; 2270 } 2271 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2272 mycols = ibuf; 2273 /* receive message of column indices*/ 2274 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2275 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2276 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2277 } 2278 2279 /* loop over local rows, determining number of off diagonal entries */ 2280 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2281 odlens = dlens + (rend-rstart); 2282 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2283 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2284 masked1 = mask + Mbs; 2285 masked2 = masked1 + Mbs; 2286 rowcount = 0; nzcount = 0; 2287 for (i=0; i<mbs; i++) { 2288 dcount = 0; 2289 odcount = 0; 2290 for (j=0; j<bs; j++) { 2291 kmax = locrowlens[rowcount]; 2292 for (k=0; k<kmax; k++) { 2293 tmp = mycols[nzcount++]/bs; /* block col. index */ 2294 if (!mask[tmp]) { 2295 mask[tmp] = 1; 2296 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2297 else masked1[dcount++] = tmp; /* entry in diag portion */ 2298 } 2299 } 2300 rowcount++; 2301 } 2302 2303 dlens[i] = dcount; /* d_nzz[i] */ 2304 odlens[i] = odcount; /* o_nzz[i] */ 2305 2306 /* zero out the mask elements we set */ 2307 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2308 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2309 } 2310 2311 /* create our matrix */ 2312 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2313 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2314 ierr = MatSetType(A,type);CHKERRQ(ierr); 2315 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2316 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2317 2318 if (!rank) { 2319 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2320 /* read in my part of the matrix numerical values */ 2321 nz = procsnz[0]; 2322 vals = buf; 2323 mycols = ibuf; 2324 if (size == 1) nz -= extra_rows; 2325 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2326 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2327 2328 /* insert into matrix */ 2329 jj = rstart*bs; 2330 for (i=0; i<m; i++) { 2331 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2332 mycols += locrowlens[i]; 2333 vals += locrowlens[i]; 2334 jj++; 2335 } 2336 2337 /* read in other processors (except the last one) and ship out */ 2338 for (i=1; i<size-1; i++) { 2339 nz = procsnz[i]; 2340 vals = buf; 2341 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2342 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2343 } 2344 /* the last proc */ 2345 if (size != 1){ 2346 nz = procsnz[i] - extra_rows; 2347 vals = buf; 2348 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2349 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2350 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2351 } 2352 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2353 2354 } else { 2355 /* receive numeric values */ 2356 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2357 2358 /* receive message of values*/ 2359 vals = buf; 2360 mycols = ibuf; 2361 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2362 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2363 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2364 2365 /* insert into matrix */ 2366 jj = rstart*bs; 2367 for (i=0; i<m; i++) { 2368 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2369 mycols += locrowlens[i]; 2370 vals += locrowlens[i]; 2371 jj++; 2372 } 2373 } 2374 2375 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2376 ierr = PetscFree(buf);CHKERRQ(ierr); 2377 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2378 ierr = PetscFree(rowners);CHKERRQ(ierr); 2379 ierr = PetscFree(dlens);CHKERRQ(ierr); 2380 ierr = PetscFree(mask);CHKERRQ(ierr); 2381 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2382 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2383 *newmat = A; 2384 PetscFunctionReturn(0); 2385 } 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2389 /*XXXXX@ 2390 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2391 2392 Input Parameters: 2393 . mat - the matrix 2394 . fact - factor 2395 2396 Collective on Mat 2397 2398 Level: advanced 2399 2400 Notes: 2401 This can also be set by the command line option: -mat_use_hash_table fact 2402 2403 .keywords: matrix, hashtable, factor, HT 2404 2405 .seealso: MatSetOption() 2406 @XXXXX*/ 2407 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2411 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2412 { 2413 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2414 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2415 PetscReal atmp; 2416 PetscReal *work,*svalues,*rvalues; 2417 PetscErrorCode ierr; 2418 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2419 PetscMPIInt rank,size; 2420 PetscInt *rowners_bs,dest,count,source; 2421 PetscScalar *va; 2422 MatScalar *ba; 2423 MPI_Status stat; 2424 2425 PetscFunctionBegin; 2426 if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2427 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2428 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2429 2430 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2431 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2432 2433 bs = A->rmap.bs; 2434 mbs = a->mbs; 2435 Mbs = a->Mbs; 2436 ba = b->a; 2437 bi = b->i; 2438 bj = b->j; 2439 2440 /* find ownerships */ 2441 rowners_bs = A->rmap.range; 2442 2443 /* each proc creates an array to be distributed */ 2444 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2445 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2446 2447 /* row_max for B */ 2448 if (rank != size-1){ 2449 for (i=0; i<mbs; i++) { 2450 ncols = bi[1] - bi[0]; bi++; 2451 brow = bs*i; 2452 for (j=0; j<ncols; j++){ 2453 bcol = bs*(*bj); 2454 for (kcol=0; kcol<bs; kcol++){ 2455 col = bcol + kcol; /* local col index */ 2456 col += rowners_bs[rank+1]; /* global col index */ 2457 for (krow=0; krow<bs; krow++){ 2458 atmp = PetscAbsScalar(*ba); ba++; 2459 row = brow + krow; /* local row index */ 2460 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2461 if (work[col] < atmp) work[col] = atmp; 2462 } 2463 } 2464 bj++; 2465 } 2466 } 2467 2468 /* send values to its owners */ 2469 for (dest=rank+1; dest<size; dest++){ 2470 svalues = work + rowners_bs[dest]; 2471 count = rowners_bs[dest+1]-rowners_bs[dest]; 2472 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2473 } 2474 } 2475 2476 /* receive values */ 2477 if (rank){ 2478 rvalues = work; 2479 count = rowners_bs[rank+1]-rowners_bs[rank]; 2480 for (source=0; source<rank; source++){ 2481 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2482 /* process values */ 2483 for (i=0; i<count; i++){ 2484 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2485 } 2486 } 2487 } 2488 2489 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2490 ierr = PetscFree(work);CHKERRQ(ierr); 2491 PetscFunctionReturn(0); 2492 } 2493 2494 #undef __FUNCT__ 2495 #define __FUNCT__ "MatRelax_MPISBAIJ" 2496 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2497 { 2498 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2499 PetscErrorCode ierr; 2500 PetscInt mbs=mat->mbs,bs=matin->rmap.bs; 2501 PetscScalar *x,*b,*ptr,zero=0.0; 2502 Vec bb1; 2503 2504 PetscFunctionBegin; 2505 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2506 if (bs > 1) 2507 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2508 2509 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2510 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2511 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2512 its--; 2513 } 2514 2515 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2516 while (its--){ 2517 2518 /* lower triangular part: slvec0b = - B^T*xx */ 2519 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2520 2521 /* copy xx into slvec0a */ 2522 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2523 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2524 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2525 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2526 2527 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2528 2529 /* copy bb into slvec1a */ 2530 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2531 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2532 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2533 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2534 2535 /* set slvec1b = 0 */ 2536 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2537 2538 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2539 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2540 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2541 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2542 2543 /* upper triangular part: bb1 = bb1 - B*x */ 2544 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2545 2546 /* local diagonal sweep */ 2547 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2548 } 2549 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2550 } else { 2551 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2552 } 2553 PetscFunctionReturn(0); 2554 } 2555 2556 #undef __FUNCT__ 2557 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2558 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2559 { 2560 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2561 PetscErrorCode ierr; 2562 Vec lvec1,bb1; 2563 2564 PetscFunctionBegin; 2565 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2566 if (matin->rmap.bs > 1) 2567 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2568 2569 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2570 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2571 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2572 its--; 2573 } 2574 2575 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2576 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2577 while (its--){ 2578 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2579 2580 /* lower diagonal part: bb1 = bb - B^T*xx */ 2581 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2582 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2583 2584 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2585 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2586 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2587 2588 /* upper diagonal part: bb1 = bb1 - B*x */ 2589 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2590 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2591 2592 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2593 2594 /* diagonal sweep */ 2595 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2596 } 2597 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2598 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2599 } else { 2600 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2601 } 2602 PetscFunctionReturn(0); 2603 } 2604 2605