1 /*$Id: mpisbaij.c,v 1.6 2000/07/18 16:54:44 hzhang Exp hzhang $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int DisAssemble_MPISBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqSBAIJ(Mat); 18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*); 19 20 /* UGLY, ugly, ugly 21 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 22 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 23 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 24 converts the entries into single precision and then calls ..._MatScalar() to put them 25 into the single precision data structures. 26 */ 27 #if defined(PETSC_USE_MAT_SINGLE) 28 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 #else 34 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 35 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 36 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 37 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 38 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 39 #endif 40 41 EXTERN_C_BEGIN 42 #undef __FUNC__ 43 #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ" 44 int MatStoreValues_MPISBAIJ(Mat mat) 45 { 46 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 47 int ierr; 48 49 PetscFunctionBegin; 50 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 51 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 52 PetscFunctionReturn(0); 53 } 54 EXTERN_C_END 55 56 EXTERN_C_BEGIN 57 #undef __FUNC__ 58 #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ" 59 int MatRetrieveValues_MPISBAIJ(Mat mat) 60 { 61 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 62 int ierr; 63 64 PetscFunctionBegin; 65 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 66 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 71 /* 72 Local utility routine that creates a mapping from the global column 73 number to the local number in the off-diagonal part of the local 74 storage of the matrix. This is done in a non scable way since the 75 length of colmap equals the global matrix length. 76 */ 77 #undef __FUNC__ 78 #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private" 79 static int CreateColmap_MPISBAIJ_Private(Mat mat) 80 { 81 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 82 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 83 int nbs = B->nbs,i,bs=B->bs,ierr; 84 85 PetscFunctionBegin; 86 #if defined (PETSC_USE_CTABLE) 87 ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 88 for (i=0; i<nbs; i++){ 89 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 90 } 91 #else 92 baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 93 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 94 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 95 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 96 #endif 97 PetscFunctionReturn(0); 98 } 99 100 #define CHUNKSIZE 10 101 102 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 103 { \ 104 \ 105 brow = row/bs; \ 106 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 107 rmax = aimax[brow]; nrow = ailen[brow]; \ 108 bcol = col/bs; \ 109 ridx = row % bs; cidx = col % bs; \ 110 low = 0; high = nrow; \ 111 while (high-low > 3) { \ 112 t = (low+high)/2; \ 113 if (rp[t] > bcol) high = t; \ 114 else low = t; \ 115 } \ 116 for (_i=low; _i<high; _i++) { \ 117 if (rp[_i] > bcol) break; \ 118 if (rp[_i] == bcol) { \ 119 bap = ap + bs2*_i + bs*cidx + ridx; \ 120 if (addv == ADD_VALUES) *bap += value; \ 121 else *bap = value; \ 122 goto a_noinsert; \ 123 } \ 124 } \ 125 if (a->nonew == 1) goto a_noinsert; \ 126 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 127 if (nrow >= rmax) { \ 128 /* there is no extra room in row, therefore enlarge */ \ 129 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 130 MatScalar *new_a; \ 131 \ 132 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 133 \ 134 /* malloc new storage space */ \ 135 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 136 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 137 new_j = (int*)(new_a + bs2*new_nz); \ 138 new_i = new_j + new_nz; \ 139 \ 140 /* copy over old data into new slots */ \ 141 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 142 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 143 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 144 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 145 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 146 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 148 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 149 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 150 /* free up old matrix storage */ \ 151 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 152 if (!a->singlemalloc) { \ 153 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 154 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 155 } \ 156 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 157 a->singlemalloc = PETSC_TRUE; \ 158 \ 159 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 160 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 161 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 162 a->s_maxnz += bs2*CHUNKSIZE; \ 163 a->reallocs++; \ 164 a->s_nz++; \ 165 } \ 166 N = nrow++ - 1; \ 167 /* shift up all the later entries in this row */ \ 168 for (ii=N; ii>=_i; ii--) { \ 169 rp[ii+1] = rp[ii]; \ 170 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 171 } \ 172 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 173 rp[_i] = bcol; \ 174 ap[bs2*_i + bs*cidx + ridx] = value; \ 175 a_noinsert:; \ 176 ailen[brow] = nrow; \ 177 } 178 #ifndef MatSetValues_SeqBAIJ_B_Private 179 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 180 { \ 181 brow = row/bs; \ 182 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 183 rmax = bimax[brow]; nrow = bilen[brow]; \ 184 bcol = col/bs; \ 185 ridx = row % bs; cidx = col % bs; \ 186 low = 0; high = nrow; \ 187 while (high-low > 3) { \ 188 t = (low+high)/2; \ 189 if (rp[t] > bcol) high = t; \ 190 else low = t; \ 191 } \ 192 for (_i=low; _i<high; _i++) { \ 193 if (rp[_i] > bcol) break; \ 194 if (rp[_i] == bcol) { \ 195 bap = ap + bs2*_i + bs*cidx + ridx; \ 196 if (addv == ADD_VALUES) *bap += value; \ 197 else *bap = value; \ 198 goto b_noinsert; \ 199 } \ 200 } \ 201 if (b->nonew == 1) goto b_noinsert; \ 202 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 203 if (nrow >= rmax) { \ 204 /* there is no extra room in row, therefore enlarge */ \ 205 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 206 MatScalar *new_a; \ 207 \ 208 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 209 \ 210 /* malloc new storage space */ \ 211 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 212 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 213 new_j = (int*)(new_a + bs2*new_nz); \ 214 new_i = new_j + new_nz; \ 215 \ 216 /* copy over old data into new slots */ \ 217 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 218 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 219 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 220 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 221 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 222 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 223 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 224 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 225 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 226 /* free up old matrix storage */ \ 227 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 228 if (!b->singlemalloc) { \ 229 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 230 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 231 } \ 232 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 233 b->singlemalloc = PETSC_TRUE; \ 234 \ 235 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 236 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 237 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 238 b->maxnz += bs2*CHUNKSIZE; \ 239 b->reallocs++; \ 240 b->nz++; \ 241 } \ 242 N = nrow++ - 1; \ 243 /* shift up all the later entries in this row */ \ 244 for (ii=N; ii>=_i; ii--) { \ 245 rp[ii+1] = rp[ii]; \ 246 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 247 } \ 248 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 249 rp[_i] = bcol; \ 250 ap[bs2*_i + bs*cidx + ridx] = value; \ 251 b_noinsert:; \ 252 bilen[brow] = nrow; \ 253 } 254 #endif 255 256 #if defined(PETSC_USE_MAT_SINGLE) 257 #undef __FUNC__ 258 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ" 259 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 260 { 261 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 262 int ierr,i,N = m*n; 263 MatScalar *vsingle; 264 265 PetscFunctionBegin; 266 if (N > b->setvalueslen) { 267 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 268 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 269 b->setvalueslen = N; 270 } 271 vsingle = b->setvaluescopy; 272 273 for (i=0; i<N; i++) { 274 vsingle[i] = v[i]; 275 } 276 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNC__ 281 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 282 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 283 { 284 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 285 int ierr,i,N = m*n*b->bs2; 286 MatScalar *vsingle; 287 288 PetscFunctionBegin; 289 if (N > b->setvalueslen) { 290 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 291 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 292 b->setvalueslen = N; 293 } 294 vsingle = b->setvaluescopy; 295 for (i=0; i<N; i++) { 296 vsingle[i] = v[i]; 297 } 298 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNC__ 303 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT" 304 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 305 { 306 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 307 int ierr,i,N = m*n; 308 MatScalar *vsingle; 309 310 PetscFunctionBegin; 311 if (N > b->setvalueslen) { 312 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 313 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 314 b->setvalueslen = N; 315 } 316 vsingle = b->setvaluescopy; 317 for (i=0; i<N; i++) { 318 vsingle[i] = v[i]; 319 } 320 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNC__ 325 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT" 326 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 327 { 328 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 329 int ierr,i,N = m*n*b->bs2; 330 MatScalar *vsingle; 331 332 PetscFunctionBegin; 333 if (N > b->setvalueslen) { 334 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 335 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 336 b->setvalueslen = N; 337 } 338 vsingle = b->setvaluescopy; 339 for (i=0; i<N; i++) { 340 vsingle[i] = v[i]; 341 } 342 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 #endif 346 347 /* Only add/insert a(i,j) with i<=j (blocks). 348 Any a(i,j) with i>j input by user is ingored. 349 */ 350 #undef __FUNC__ 351 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 352 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 353 { 354 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 355 MatScalar value; 356 int ierr,i,j,row,col; 357 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 358 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 359 int cend_orig=baij->cend_bs,bs=baij->bs; 360 361 /* Some Variables required in the macro */ 362 Mat A = baij->A; 363 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 364 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 365 MatScalar *aa=a->a; 366 367 Mat B = baij->B; 368 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 369 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 370 MatScalar *ba=b->a; 371 372 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 373 int low,high,t,ridx,cidx,bs2=a->bs2; 374 MatScalar *ap,*bap; 375 376 /* for stash */ 377 int n_loc, *in_loc=0; 378 MatScalar *v_loc=0; 379 380 PetscFunctionBegin; 381 382 if(!baij->donotstash){ 383 in_loc = (int*)PetscMalloc(n*sizeof(int)); CHKPTRQ(in_loc); 384 v_loc = (MatScalar*)PetscMalloc(n*sizeof(MatScalar)); CHKPTRQ(v_loc); 385 } 386 387 for (i=0; i<m; i++) { 388 if (im[i] < 0) continue; 389 #if defined(PETSC_USE_BOPT_g) 390 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 391 #endif 392 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 393 row = im[i] - rstart_orig; /* local row index */ 394 for (j=0; j<n; j++) { 395 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 396 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 397 col = in[j] - cstart_orig; /* local col index */ 398 brow = row/bs; bcol = col/bs; 399 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 400 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 401 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 402 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 403 } else if (in[j] < 0) continue; 404 #if defined(PETSC_USE_BOPT_g) 405 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 406 #endif 407 else { /* off-diag entry (B) */ 408 if (mat->was_assembled) { 409 if (!baij->colmap) { 410 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 411 } 412 #if defined (PETSC_USE_CTABLE) 413 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 414 col = col - 1 + in[j]%bs; 415 #else 416 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 417 #endif 418 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 419 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 420 col = in[j]; 421 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 422 B = baij->B; 423 b = (Mat_SeqBAIJ*)(B)->data; 424 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 425 ba=b->a; 426 } 427 } else col = in[j]; 428 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 429 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 430 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 431 } 432 } 433 } else { /* off processor entry */ 434 if (!baij->donotstash) { 435 n_loc = 0; 436 for (j=0; j<n; j++){ 437 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 438 in_loc[n_loc] = in[j]; 439 if (roworiented) { 440 v_loc[n_loc] = v[i*n+j]; 441 } else { 442 v_loc[n_loc] = v[j*m+i]; 443 } 444 n_loc++; 445 } 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 447 } 448 } 449 } 450 451 if(!baij->donotstash){ 452 ierr = PetscFree(in_loc);CHKERRQ(ierr); 453 ierr = PetscFree(v_loc);CHKERRQ(ierr); 454 } 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNC__ 459 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 460 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 461 { 462 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 463 MatScalar *value,*barray=baij->barray; 464 int ierr,i,j,ii,jj,row,col; 465 int roworiented = baij->roworiented,rstart=baij->rstart ; 466 int rend=baij->rend,cstart=baij->cstart,stepval; 467 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 468 469 PetscFunctionBegin; 470 if(!barray) { 471 baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 472 } 473 474 if (roworiented) { 475 stepval = (n-1)*bs; 476 } else { 477 stepval = (m-1)*bs; 478 } 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_BOPT_g) 482 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 for (j=0; j<n; j++) { 487 /* If NumCol = 1 then a copy is not required */ 488 if ((roworiented) && (n == 1)) { 489 barray = v + i*bs2; 490 } else if((!roworiented) && (m == 1)) { 491 barray = v + j*bs2; 492 } else { /* Here a copy is required */ 493 if (roworiented) { 494 value = v + i*(stepval+bs)*bs + j*bs; 495 } else { 496 value = v + j*(stepval+bs)*bs + i*bs; 497 } 498 for (ii=0; ii<bs; ii++,value+=stepval) { 499 for (jj=0; jj<bs; jj++) { 500 *barray++ = *value++; 501 } 502 } 503 barray -=bs2; 504 } 505 506 if (in[j] >= cstart && in[j] < cend){ 507 col = in[j] - cstart; 508 ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509 } 510 else if (in[j] < 0) continue; 511 #if defined(PETSC_USE_BOPT_g) 512 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 513 #endif 514 else { 515 if (mat->was_assembled) { 516 if (!baij->colmap) { 517 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 518 } 519 520 #if defined(PETSC_USE_BOPT_g) 521 #if defined (PETSC_USE_CTABLE) 522 { int data; 523 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 524 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 525 } 526 #else 527 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 528 #endif 529 #endif 530 #if defined (PETSC_USE_CTABLE) 531 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 532 col = (col - 1)/bs; 533 #else 534 col = (baij->colmap[in[j]] - 1)/bs; 535 #endif 536 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 537 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 538 col = in[j]; 539 } 540 } 541 else col = in[j]; 542 ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 PetscFunctionReturn(0); 556 } 557 558 #define HASH_KEY 0.6180339887 559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562 #undef __FUNC__ 563 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar" 564 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 int ierr,i,j,row,col; 568 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 569 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 570 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 571 PetscReal tmp; 572 MatScalar **HD = baij->hd,value; 573 #if defined(PETSC_USE_BOPT_g) 574 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 575 #endif 576 577 PetscFunctionBegin; 578 579 for (i=0; i<m; i++) { 580 #if defined(PETSC_USE_BOPT_g) 581 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 582 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 583 #endif 584 row = im[i]; 585 if (row >= rstart_orig && row < rend_orig) { 586 for (j=0; j<n; j++) { 587 col = in[j]; 588 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 589 /* Look up into the Hash Table */ 590 key = (row/bs)*Nbs+(col/bs)+1; 591 h1 = HASH(size,key,tmp); 592 593 594 idx = h1; 595 #if defined(PETSC_USE_BOPT_g) 596 insert_ct++; 597 total_ct++; 598 if (HT[idx] != key) { 599 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 600 if (idx == size) { 601 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 602 if (idx == h1) { 603 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 604 } 605 } 606 } 607 #else 608 if (HT[idx] != key) { 609 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 610 if (idx == size) { 611 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 612 if (idx == h1) { 613 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 614 } 615 } 616 } 617 #endif 618 /* A HASH table entry is found, so insert the values at the correct address */ 619 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 620 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 621 } 622 } else { 623 if (!baij->donotstash) { 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 #if defined(PETSC_USE_BOPT_g) 633 baij->ht_total_ct = total_ct; 634 baij->ht_insert_ct = insert_ct; 635 #endif 636 PetscFunctionReturn(0); 637 } 638 639 #undef __FUNC__ 640 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 641 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 642 { 643 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 644 int ierr,i,j,ii,jj,row,col; 645 int roworiented = baij->roworiented,rstart=baij->rstart ; 646 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 647 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 648 PetscReal tmp; 649 MatScalar **HD = baij->hd,*baij_a; 650 MatScalar *v_t,*value; 651 #if defined(PETSC_USE_BOPT_g) 652 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 653 #endif 654 655 PetscFunctionBegin; 656 657 if (roworiented) { 658 stepval = (n-1)*bs; 659 } else { 660 stepval = (m-1)*bs; 661 } 662 for (i=0; i<m; i++) { 663 #if defined(PETSC_USE_BOPT_g) 664 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 665 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 666 #endif 667 row = im[i]; 668 v_t = v + i*bs2; 669 if (row >= rstart && row < rend) { 670 for (j=0; j<n; j++) { 671 col = in[j]; 672 673 /* Look up into the Hash Table */ 674 key = row*Nbs+col+1; 675 h1 = HASH(size,key,tmp); 676 677 idx = h1; 678 #if defined(PETSC_USE_BOPT_g) 679 total_ct++; 680 insert_ct++; 681 if (HT[idx] != key) { 682 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 683 if (idx == size) { 684 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 685 if (idx == h1) { 686 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 687 } 688 } 689 } 690 #else 691 if (HT[idx] != key) { 692 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 693 if (idx == size) { 694 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 695 if (idx == h1) { 696 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 697 } 698 } 699 } 700 #endif 701 baij_a = HD[idx]; 702 if (roworiented) { 703 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 704 /* value = v + (i*(stepval+bs)+j)*bs; */ 705 value = v_t; 706 v_t += bs; 707 if (addv == ADD_VALUES) { 708 for (ii=0; ii<bs; ii++,value+=stepval) { 709 for (jj=ii; jj<bs2; jj+=bs) { 710 baij_a[jj] += *value++; 711 } 712 } 713 } else { 714 for (ii=0; ii<bs; ii++,value+=stepval) { 715 for (jj=ii; jj<bs2; jj+=bs) { 716 baij_a[jj] = *value++; 717 } 718 } 719 } 720 } else { 721 value = v + j*(stepval+bs)*bs + i*bs; 722 if (addv == ADD_VALUES) { 723 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 724 for (jj=0; jj<bs; jj++) { 725 baij_a[jj] += *value++; 726 } 727 } 728 } else { 729 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 730 for (jj=0; jj<bs; jj++) { 731 baij_a[jj] = *value++; 732 } 733 } 734 } 735 } 736 } 737 } else { 738 if (!baij->donotstash) { 739 if (roworiented) { 740 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 741 } else { 742 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 743 } 744 } 745 } 746 } 747 #if defined(PETSC_USE_BOPT_g) 748 baij->ht_total_ct = total_ct; 749 baij->ht_insert_ct = insert_ct; 750 #endif 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNC__ 755 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ" 756 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 757 { 758 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 759 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 760 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 765 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 766 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 767 row = idxm[i] - bsrstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 770 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 771 if (idxn[j] >= bscstart && idxn[j] < bscend){ 772 col = idxn[j] - bscstart; 773 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!baij->colmap) { 776 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined (PETSC_USE_CTABLE) 779 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 780 data --; 781 #else 782 data = baij->colmap[idxn[j]/bs]-1; 783 #endif 784 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 785 else { 786 col = data + idxn[j]%bs; 787 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 788 } 789 } 790 } 791 } else { 792 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 793 } 794 } 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNC__ 799 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ" 800 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 801 { 802 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 803 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 804 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 805 int ierr; 806 PetscReal sum[2],*lnorm2; 807 808 PetscFunctionBegin; 809 if (baij->size == 1) { 810 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 811 } else { 812 if (type == NORM_FROBENIUS) { 813 lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2); 814 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 815 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 816 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 817 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 818 /* 819 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 820 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 821 */ 822 ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 823 /* 824 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 825 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 826 827 *norm = sqrt(sum[0] + 2*sum[1]); 828 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 829 } else { 830 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 831 } 832 } 833 PetscFunctionReturn(0); 834 } 835 836 /* 837 Creates the hash table, and sets the table 838 This table is created only once. 839 If new entried need to be added to the matrix 840 then the hash table has to be destroyed and 841 recreated. 842 */ 843 #undef __FUNC__ 844 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private" 845 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 846 { 847 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 848 Mat A = baij->A,B=baij->B; 849 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 850 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 851 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 852 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 853 int *HT,key; 854 MatScalar **HD; 855 PetscReal tmp; 856 #if defined(PETSC_USE_BOPT_g) 857 int ct=0,max=0; 858 #endif 859 860 PetscFunctionBegin; 861 baij->ht_size=(int)(factor*nz); 862 size = baij->ht_size; 863 864 if (baij->ht) { 865 PetscFunctionReturn(0); 866 } 867 868 /* Allocate Memory for Hash Table */ 869 baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 870 baij->ht = (int*)(baij->hd + size); 871 HD = baij->hd; 872 HT = baij->ht; 873 874 875 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 876 877 878 /* Loop Over A */ 879 for (i=0; i<a->mbs; i++) { 880 for (j=ai[i]; j<ai[i+1]; j++) { 881 row = i+rstart; 882 col = aj[j]+cstart; 883 884 key = row*Nbs + col + 1; 885 h1 = HASH(size,key,tmp); 886 for (k=0; k<size; k++){ 887 if (HT[(h1+k)%size] == 0.0) { 888 HT[(h1+k)%size] = key; 889 HD[(h1+k)%size] = a->a + j*bs2; 890 break; 891 #if defined(PETSC_USE_BOPT_g) 892 } else { 893 ct++; 894 #endif 895 } 896 } 897 #if defined(PETSC_USE_BOPT_g) 898 if (k> max) max = k; 899 #endif 900 } 901 } 902 /* Loop Over B */ 903 for (i=0; i<b->mbs; i++) { 904 for (j=bi[i]; j<bi[i+1]; j++) { 905 row = i+rstart; 906 col = garray[bj[j]]; 907 key = row*Nbs + col + 1; 908 h1 = HASH(size,key,tmp); 909 for (k=0; k<size; k++){ 910 if (HT[(h1+k)%size] == 0.0) { 911 HT[(h1+k)%size] = key; 912 HD[(h1+k)%size] = b->a + j*bs2; 913 break; 914 #if defined(PETSC_USE_BOPT_g) 915 } else { 916 ct++; 917 #endif 918 } 919 } 920 #if defined(PETSC_USE_BOPT_g) 921 if (k> max) max = k; 922 #endif 923 } 924 } 925 926 /* Print Summary */ 927 #if defined(PETSC_USE_BOPT_g) 928 for (i=0,j=0; i<size; i++) { 929 if (HT[i]) {j++;} 930 } 931 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 932 #endif 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNC__ 937 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ" 938 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 939 { 940 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 941 int ierr,nstash,reallocs; 942 InsertMode addv; 943 944 PetscFunctionBegin; 945 if (baij->donotstash) { 946 PetscFunctionReturn(0); 947 } 948 949 /* make sure all processors are either in INSERTMODE or ADDMODE */ 950 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 951 if (addv == (ADD_VALUES|INSERT_VALUES)) { 952 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 953 } 954 mat->insertmode = addv; /* in case this processor had no cache */ 955 956 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 957 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 958 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 959 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 960 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 961 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNC__ 966 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ" 967 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 968 { 969 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 970 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 971 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 972 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 973 int *row,*col,other_disassembled; 974 PetscTruth r1,r2,r3; 975 MatScalar *val; 976 InsertMode addv = mat->insertmode; 977 int rank; 978 979 PetscFunctionBegin; 980 /* remove 2 line below later */ 981 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 982 983 if (!baij->donotstash) { 984 while (1) { 985 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 986 /* 987 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 988 PetscSynchronizedFlush(PETSC_COMM_WORLD); 989 */ 990 if (!flg) break; 991 992 for (i=0; i<n;) { 993 /* Now identify the consecutive vals belonging to the same row */ 994 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 995 if (j < n) ncols = j-i; 996 else ncols = n-i; 997 /* Now assemble all these values with a single function call */ 998 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 999 i = j; 1000 } 1001 } 1002 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1003 /* Now process the block-stash. Since the values are stashed column-oriented, 1004 set the roworiented flag to column oriented, and after MatSetValues() 1005 restore the original flags */ 1006 r1 = baij->roworiented; 1007 r2 = a->roworiented; 1008 r3 = b->roworiented; 1009 baij->roworiented = PETSC_FALSE; 1010 a->roworiented = PETSC_FALSE; 1011 b->roworiented = PETSC_FALSE; 1012 while (1) { 1013 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1014 if (!flg) break; 1015 1016 for (i=0; i<n;) { 1017 /* Now identify the consecutive vals belonging to the same row */ 1018 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1019 if (j < n) ncols = j-i; 1020 else ncols = n-i; 1021 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1022 i = j; 1023 } 1024 } 1025 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1026 baij->roworiented = r1; 1027 a->roworiented = r2; 1028 b->roworiented = r3; 1029 } 1030 1031 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1032 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1033 1034 /* determine if any processor has disassembled, if so we must 1035 also disassemble ourselfs, in order that we may reassemble. */ 1036 /* 1037 if nonzero structure of submatrix B cannot change then we know that 1038 no processor disassembled thus we can skip this stuff 1039 */ 1040 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1041 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1042 if (mat->was_assembled && !other_disassembled) { 1043 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 1044 } 1045 } 1046 1047 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1048 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); 1049 } 1050 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1051 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1052 1053 #if defined(PETSC_USE_BOPT_g) 1054 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1055 PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 1056 baij->ht_total_ct = 0; 1057 baij->ht_insert_ct = 0; 1058 } 1059 #endif 1060 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1061 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1062 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 1063 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 1064 } 1065 1066 if (baij->rowvalues) { 1067 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1068 baij->rowvalues = 0; 1069 } 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNC__ 1074 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket" 1075 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 1076 { 1077 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1078 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 1079 PetscTruth isascii,isdraw; 1080 Viewer sviewer; 1081 1082 PetscFunctionBegin; 1083 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1084 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1085 if (isascii) { 1086 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1087 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1088 MatInfo info; 1089 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1090 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1091 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1092 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1093 baij->bs,(int)info.memory);CHKERRQ(ierr); 1094 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1095 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1096 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1097 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1098 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 1099 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1102 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 } 1106 1107 if (isdraw) { 1108 Draw draw; 1109 PetscTruth isnull; 1110 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1111 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1112 } 1113 1114 if (size == 1) { 1115 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1116 } else { 1117 /* assemble the entire matrix onto first processor. */ 1118 Mat A; 1119 Mat_SeqSBAIJ *Aloc; 1120 Mat_SeqBAIJ *Bloc; 1121 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1122 MatScalar *a; 1123 1124 if (!rank) { 1125 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1126 } else { 1127 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1128 } 1129 PLogObjectParent(mat,A); 1130 1131 /* copy over the A part */ 1132 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 1133 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1134 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1135 1136 for (i=0; i<mbs; i++) { 1137 rvals[0] = bs*(baij->rstart + i); 1138 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1139 for (j=ai[i]; j<ai[i+1]; j++) { 1140 col = (baij->cstart+aj[j])*bs; 1141 for (k=0; k<bs; k++) { 1142 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1143 col++; a += bs; 1144 } 1145 } 1146 } 1147 /* copy over the B part */ 1148 Bloc = (Mat_SeqBAIJ*)baij->B->data; 1149 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 1150 for (i=0; i<mbs; i++) { 1151 rvals[0] = bs*(baij->rstart + i); 1152 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1153 for (j=ai[i]; j<ai[i+1]; j++) { 1154 col = baij->garray[aj[j]]*bs; 1155 for (k=0; k<bs; k++) { 1156 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1157 col++; a += bs; 1158 } 1159 } 1160 } 1161 ierr = PetscFree(rvals);CHKERRQ(ierr); 1162 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1163 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1164 /* 1165 Everyone has to call to draw the matrix since the graphics waits are 1166 synchronized across all processors that share the Draw object 1167 */ 1168 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1169 if (!rank) { 1170 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1171 } 1172 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1173 ierr = MatDestroy(A);CHKERRQ(ierr); 1174 } 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNC__ 1179 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ" 1180 int MatView_MPISBAIJ(Mat mat,Viewer viewer) 1181 { 1182 int ierr; 1183 PetscTruth isascii,isdraw,issocket,isbinary; 1184 1185 PetscFunctionBegin; 1186 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1187 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1188 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1189 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1190 if (isascii || isdraw || issocket || isbinary) { 1191 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1192 } else { 1193 SETERRQ1(1,1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNC__ 1199 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ" 1200 int MatDestroy_MPISBAIJ(Mat mat) 1201 { 1202 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1203 int ierr; 1204 1205 PetscFunctionBegin; 1206 1207 if (mat->mapping) { 1208 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1209 } 1210 if (mat->bmapping) { 1211 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1212 } 1213 if (mat->rmap) { 1214 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1215 } 1216 if (mat->cmap) { 1217 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1218 } 1219 #if defined(PETSC_USE_LOG) 1220 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 1221 #endif 1222 1223 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1224 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1225 1226 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1227 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1228 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1229 #if defined (PETSC_USE_CTABLE) 1230 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1231 #else 1232 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1233 #endif 1234 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1235 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1236 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1237 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1238 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1239 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1240 #if defined(PETSC_USE_MAT_SINGLE) 1241 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1242 #endif 1243 ierr = PetscFree(baij);CHKERRQ(ierr); 1244 PLogObjectDestroy(mat); 1245 PetscHeaderDestroy(mat); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNC__ 1250 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ" 1251 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1252 { 1253 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1254 int ierr,nt; 1255 1256 PetscFunctionBegin; 1257 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1258 if (nt != a->n) { 1259 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1260 } 1261 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1262 if (nt != a->m) { 1263 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1264 } 1265 1266 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1267 /* do diagonal part */ 1268 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1269 /* do supperdiagonal part */ 1270 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1271 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1272 /* do subdiagonal part */ 1273 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1274 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1275 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1276 #ifdef old /* working code */ 1277 /* do diagonal part */ 1278 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1279 1280 /* do supperdiagonal part */ 1281 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1282 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1283 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1284 1285 /* do subdiagonal part */ 1286 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1287 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1288 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1289 #endif 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNC__ 1294 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ" 1295 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1296 { 1297 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1298 int ierr; 1299 1300 PetscFunctionBegin; 1301 1302 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], calledMatMultAdd_MPISBAIJ \n",rank); 1303 PetscSynchronizedFlush(PETSC_COMM_WORLD); 1304 1305 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1306 /* do diagonal part */ 1307 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1308 /* do supperdiagonal part */ 1309 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1310 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,zz);CHKERRQ(ierr); 1311 /* do subdiagonal part */ 1312 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1313 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1314 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1315 #ifdef old 1316 /* do subdiagonal part */ 1317 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1318 /* send it on its way */ 1319 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1320 /* do local part */ 1321 ierr = (*a->B->ops->multadd)(a->B,xx,yy,zz);CHKERRQ(ierr); 1322 ierr = (*a->A->ops->multadd)(a->A,xx,zz,zz);CHKERRQ(ierr); 1323 /* receive remote parts */ 1324 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1325 #endif 1326 PetscFunctionReturn(0); 1327 } 1328 1329 #undef __FUNC__ 1330 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ" 1331 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1332 { 1333 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1334 int ierr; 1335 1336 PetscFunctionBegin; 1337 /* do nondiagonal part */ 1338 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1339 /* send it on its way */ 1340 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1341 /* do local part */ 1342 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1343 /* receive remote parts: note this assumes the values are not actually */ 1344 /* inserted in yy until the next line, which is true for my implementation*/ 1345 /* but is not perhaps always true. */ 1346 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNC__ 1351 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ" 1352 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1353 { 1354 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1355 int ierr; 1356 1357 PetscFunctionBegin; 1358 /* do nondiagonal part */ 1359 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1360 /* send it on its way */ 1361 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1362 /* do local part */ 1363 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1364 /* receive remote parts: note this assumes the values are not actually */ 1365 /* inserted in yy until the next line, which is true for my implementation*/ 1366 /* but is not perhaps always true. */ 1367 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 1371 /* 1372 This only works correctly for square matrices where the subblock A->A is the 1373 diagonal block 1374 */ 1375 #undef __FUNC__ 1376 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ" 1377 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1378 { 1379 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1380 int ierr; 1381 1382 PetscFunctionBegin; 1383 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); */ 1384 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNC__ 1389 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ" 1390 int MatScale_MPISBAIJ(Scalar *aa,Mat A) 1391 { 1392 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1393 int ierr; 1394 1395 PetscFunctionBegin; 1396 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1397 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNC__ 1402 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ" 1403 int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n) 1404 { 1405 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1406 1407 PetscFunctionBegin; 1408 if (m) *m = mat->M; 1409 if (n) *n = mat->N; 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNC__ 1414 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ" 1415 int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n) 1416 { 1417 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1418 1419 PetscFunctionBegin; 1420 *m = mat->m; *n = mat->n; 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNC__ 1425 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ" 1426 int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n) 1427 { 1428 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1429 1430 PetscFunctionBegin; 1431 if (m) *m = mat->rstart*mat->bs; 1432 if (n) *n = mat->rend*mat->bs; 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNC__ 1437 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ" 1438 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1439 { 1440 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1441 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1442 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1443 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1444 int *cmap,*idx_p,cstart = mat->cstart; 1445 1446 PetscFunctionBegin; 1447 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1448 mat->getrowactive = PETSC_TRUE; 1449 1450 if (!mat->rowvalues && (idx || v)) { 1451 /* 1452 allocate enough space to hold information from the longest row. 1453 */ 1454 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1455 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1456 int max = 1,mbs = mat->mbs,tmp; 1457 for (i=0; i<mbs; i++) { 1458 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1459 if (max < tmp) { max = tmp; } 1460 } 1461 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1462 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1463 } 1464 1465 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1466 lrow = row - brstart; /* local row index */ 1467 1468 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1469 if (!v) {pvA = 0; pvB = 0;} 1470 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1471 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1472 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1473 nztot = nzA + nzB; 1474 1475 cmap = mat->garray; 1476 if (v || idx) { 1477 if (nztot) { 1478 /* Sort by increasing column numbers, assuming A and B already sorted */ 1479 int imark = -1; 1480 if (v) { 1481 *v = v_p = mat->rowvalues; 1482 for (i=0; i<nzB; i++) { 1483 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1484 else break; 1485 } 1486 imark = i; 1487 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1488 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1489 } 1490 if (idx) { 1491 *idx = idx_p = mat->rowindices; 1492 if (imark > -1) { 1493 for (i=0; i<imark; i++) { 1494 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1495 } 1496 } else { 1497 for (i=0; i<nzB; i++) { 1498 if (cmap[cworkB[i]/bs] < cstart) 1499 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1500 else break; 1501 } 1502 imark = i; 1503 } 1504 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1505 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1506 } 1507 } else { 1508 if (idx) *idx = 0; 1509 if (v) *v = 0; 1510 } 1511 } 1512 *nz = nztot; 1513 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1514 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNC__ 1519 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ" 1520 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1521 { 1522 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1523 1524 PetscFunctionBegin; 1525 if (baij->getrowactive == PETSC_FALSE) { 1526 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1527 } 1528 baij->getrowactive = PETSC_FALSE; 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNC__ 1533 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ" 1534 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1535 { 1536 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1537 1538 PetscFunctionBegin; 1539 *bs = baij->bs; 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNC__ 1544 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ" 1545 int MatZeroEntries_MPISBAIJ(Mat A) 1546 { 1547 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1548 int ierr; 1549 1550 PetscFunctionBegin; 1551 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1552 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 #undef __FUNC__ 1557 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ" 1558 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1559 { 1560 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1561 Mat A = a->A,B = a->B; 1562 int ierr; 1563 PetscReal isend[5],irecv[5]; 1564 1565 PetscFunctionBegin; 1566 info->block_size = (double)a->bs; 1567 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1568 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1569 isend[3] = info->memory; isend[4] = info->mallocs; 1570 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1571 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1572 isend[3] += info->memory; isend[4] += info->mallocs; 1573 if (flag == MAT_LOCAL) { 1574 info->nz_used = isend[0]; 1575 info->nz_allocated = isend[1]; 1576 info->nz_unneeded = isend[2]; 1577 info->memory = isend[3]; 1578 info->mallocs = isend[4]; 1579 } else if (flag == MAT_GLOBAL_MAX) { 1580 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1581 info->nz_used = irecv[0]; 1582 info->nz_allocated = irecv[1]; 1583 info->nz_unneeded = irecv[2]; 1584 info->memory = irecv[3]; 1585 info->mallocs = irecv[4]; 1586 } else if (flag == MAT_GLOBAL_SUM) { 1587 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1588 info->nz_used = irecv[0]; 1589 info->nz_allocated = irecv[1]; 1590 info->nz_unneeded = irecv[2]; 1591 info->memory = irecv[3]; 1592 info->mallocs = irecv[4]; 1593 } else { 1594 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1595 } 1596 info->rows_global = (double)a->M; 1597 info->columns_global = (double)a->N; 1598 info->rows_local = (double)a->m; 1599 info->columns_local = (double)a->N; 1600 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1601 info->fill_ratio_needed = 0; 1602 info->factor_mallocs = 0; 1603 PetscFunctionReturn(0); 1604 } 1605 1606 #undef __FUNC__ 1607 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ" 1608 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1609 { 1610 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1611 int ierr; 1612 1613 PetscFunctionBegin; 1614 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1615 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1616 op == MAT_COLUMNS_UNSORTED || 1617 op == MAT_COLUMNS_SORTED || 1618 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1619 op == MAT_KEEP_ZEROED_ROWS || 1620 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1621 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1622 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1623 } else if (op == MAT_ROW_ORIENTED) { 1624 a->roworiented = PETSC_TRUE; 1625 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1626 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1627 } else if (op == MAT_ROWS_SORTED || 1628 op == MAT_ROWS_UNSORTED || 1629 op == MAT_SYMMETRIC || 1630 op == MAT_STRUCTURALLY_SYMMETRIC || 1631 op == MAT_YES_NEW_DIAGONALS || 1632 op == MAT_USE_HASH_TABLE) { 1633 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1634 } else if (op == MAT_COLUMN_ORIENTED) { 1635 a->roworiented = PETSC_FALSE; 1636 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1637 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1638 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1639 a->donotstash = PETSC_TRUE; 1640 } else if (op == MAT_NO_NEW_DIAGONALS) { 1641 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1642 } else if (op == MAT_USE_HASH_TABLE) { 1643 a->ht_flag = PETSC_TRUE; 1644 } else { 1645 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1646 } 1647 PetscFunctionReturn(0); 1648 } 1649 1650 #undef __FUNC__ 1651 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ(" 1652 int MatTranspose_MPISBAIJ(Mat A,Mat *matout) 1653 { 1654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1655 Mat_SeqBAIJ *Aloc; 1656 Mat B; 1657 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1658 int bs=baij->bs,mbs=baij->mbs; 1659 MatScalar *a; 1660 1661 PetscFunctionBegin; 1662 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1663 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1664 1665 /* copy over the A part */ 1666 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1667 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1668 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1669 1670 for (i=0; i<mbs; i++) { 1671 rvals[0] = bs*(baij->rstart + i); 1672 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1673 for (j=ai[i]; j<ai[i+1]; j++) { 1674 col = (baij->cstart+aj[j])*bs; 1675 for (k=0; k<bs; k++) { 1676 ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1677 col++; a += bs; 1678 } 1679 } 1680 } 1681 /* copy over the B part */ 1682 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1683 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1684 for (i=0; i<mbs; i++) { 1685 rvals[0] = bs*(baij->rstart + i); 1686 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1687 for (j=ai[i]; j<ai[i+1]; j++) { 1688 col = baij->garray[aj[j]]*bs; 1689 for (k=0; k<bs; k++) { 1690 ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1691 col++; a += bs; 1692 } 1693 } 1694 } 1695 ierr = PetscFree(rvals);CHKERRQ(ierr); 1696 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1697 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1698 1699 if (matout) { 1700 *matout = B; 1701 } else { 1702 PetscOps *Abops; 1703 MatOps Aops; 1704 1705 /* This isn't really an in-place transpose .... but free data structures from baij */ 1706 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1707 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1708 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1709 #if defined (PETSC_USE_CTABLE) 1710 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1711 #else 1712 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1713 #endif 1714 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1715 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1716 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1717 ierr = PetscFree(baij);CHKERRQ(ierr); 1718 1719 /* 1720 This is horrible, horrible code. We need to keep the 1721 A pointers for the bops and ops but copy everything 1722 else from C. 1723 */ 1724 Abops = A->bops; 1725 Aops = A->ops; 1726 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1727 A->bops = Abops; 1728 A->ops = Aops; 1729 1730 PetscHeaderDestroy(B); 1731 } 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNC__ 1736 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ" 1737 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1738 { 1739 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1740 Mat a = baij->A,b = baij->B; 1741 int ierr,s1,s2,s3; 1742 1743 PetscFunctionBegin; 1744 if (ll != rr) { 1745 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n"); 1746 } 1747 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1748 if (rr) { 1749 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1750 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1751 /* Overlap communication with computation. */ 1752 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1753 /*} if (ll) { */ 1754 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1755 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1756 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1757 /* } */ 1758 /* scale the diagonal block */ 1759 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1760 1761 /* if (rr) { */ 1762 /* Do a scatter end and then right scale the off-diagonal block */ 1763 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1764 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1765 } 1766 1767 PetscFunctionReturn(0); 1768 } 1769 1770 #undef __FUNC__ 1771 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ" 1772 int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag) 1773 { 1774 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1775 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1776 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1777 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1778 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1779 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1780 MPI_Comm comm = A->comm; 1781 MPI_Request *send_waits,*recv_waits; 1782 MPI_Status recv_status,*send_status; 1783 IS istmp; 1784 1785 PetscFunctionBegin; 1786 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1787 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1788 1789 /* first count number of contributors to each processor */ 1790 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1791 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1792 procs = nprocs + size; 1793 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1794 for (i=0; i<N; i++) { 1795 idx = rows[i]; 1796 found = 0; 1797 for (j=0; j<size; j++) { 1798 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1799 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1800 } 1801 } 1802 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1803 } 1804 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1805 1806 /* inform other processors of number of messages and max length*/ 1807 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1808 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1809 nmax = work[rank]; 1810 nrecvs = work[size+rank]; 1811 ierr = PetscFree(work);CHKERRQ(ierr); 1812 1813 /* post receives: */ 1814 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1815 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1816 for (i=0; i<nrecvs; i++) { 1817 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1818 } 1819 1820 /* do sends: 1821 1) starts[i] gives the starting index in svalues for stuff going to 1822 the ith processor 1823 */ 1824 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1825 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1826 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1827 starts[0] = 0; 1828 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1829 for (i=0; i<N; i++) { 1830 svalues[starts[owner[i]]++] = rows[i]; 1831 } 1832 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1833 1834 starts[0] = 0; 1835 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1836 count = 0; 1837 for (i=0; i<size; i++) { 1838 if (procs[i]) { 1839 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1840 } 1841 } 1842 ierr = PetscFree(starts);CHKERRQ(ierr); 1843 1844 base = owners[rank]*bs; 1845 1846 /* wait on receives */ 1847 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1848 source = lens + nrecvs; 1849 count = nrecvs; slen = 0; 1850 while (count) { 1851 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1852 /* unpack receives into our local space */ 1853 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1854 source[imdex] = recv_status.MPI_SOURCE; 1855 lens[imdex] = n; 1856 slen += n; 1857 count--; 1858 } 1859 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1860 1861 /* move the data into the send scatter */ 1862 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1863 count = 0; 1864 for (i=0; i<nrecvs; i++) { 1865 values = rvalues + i*nmax; 1866 for (j=0; j<lens[i]; j++) { 1867 lrows[count++] = values[j] - base; 1868 } 1869 } 1870 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1871 ierr = PetscFree(lens);CHKERRQ(ierr); 1872 ierr = PetscFree(owner);CHKERRQ(ierr); 1873 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1874 1875 /* actually zap the local rows */ 1876 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1877 PLogObjectParent(A,istmp); 1878 1879 /* 1880 Zero the required rows. If the "diagonal block" of the matrix 1881 is square and the user wishes to set the diagonal we use seperate 1882 code so that MatSetValues() is not called for each diagonal allocating 1883 new memory, thus calling lots of mallocs and slowing things down. 1884 1885 Contributed by: Mathew Knepley 1886 */ 1887 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1888 ierr = MatZeroRows_SeqSBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1889 if (diag && (l->A->M == l->A->N)) { 1890 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1891 } else if (diag) { 1892 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1893 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1894 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1895 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1896 } 1897 for (i=0; i<slen; i++) { 1898 row = lrows[i] + rstart_bs; 1899 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1900 } 1901 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1902 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1903 } else { 1904 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1905 } 1906 1907 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1908 ierr = PetscFree(lrows);CHKERRQ(ierr); 1909 1910 /* wait on sends */ 1911 if (nsends) { 1912 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1913 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1914 ierr = PetscFree(send_status);CHKERRQ(ierr); 1915 } 1916 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1917 ierr = PetscFree(svalues);CHKERRQ(ierr); 1918 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNC__ 1923 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ" 1924 int MatPrintHelp_MPISBAIJ(Mat A) 1925 { 1926 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1927 MPI_Comm comm = A->comm; 1928 static int called = 0; 1929 int ierr; 1930 1931 PetscFunctionBegin; 1932 if (!a->rank) { 1933 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1934 } 1935 if (called) {PetscFunctionReturn(0);} else called = 1; 1936 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1937 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNC__ 1942 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ" 1943 int MatSetUnfactored_MPISBAIJ(Mat A) 1944 { 1945 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1946 int ierr; 1947 1948 PetscFunctionBegin; 1949 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1950 PetscFunctionReturn(0); 1951 } 1952 1953 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1954 1955 #undef __FUNC__ 1956 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ" 1957 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1958 { 1959 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1960 Mat a,b,c,d; 1961 PetscTruth flg; 1962 int ierr; 1963 1964 PetscFunctionBegin; 1965 if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1966 a = matA->A; b = matA->B; 1967 c = matB->A; d = matB->B; 1968 1969 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1970 if (flg == PETSC_TRUE) { 1971 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1972 } 1973 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1974 PetscFunctionReturn(0); 1975 } 1976 1977 /* -------------------------------------------------------------------*/ 1978 static struct _MatOps MatOps_Values = { 1979 MatSetValues_MPISBAIJ, 1980 MatGetRow_MPISBAIJ, 1981 MatRestoreRow_MPISBAIJ, 1982 MatMult_MPISBAIJ, 1983 MatMultAdd_MPISBAIJ, 1984 MatMultTranspose_MPISBAIJ, 1985 MatMultTransposeAdd_MPISBAIJ, 1986 0, 1987 0, 1988 0, 1989 0, 1990 0, 1991 0, 1992 0, 1993 MatTranspose_MPISBAIJ, 1994 MatGetInfo_MPISBAIJ, 1995 MatEqual_MPISBAIJ, 1996 MatGetDiagonal_MPISBAIJ, 1997 MatDiagonalScale_MPISBAIJ, 1998 MatNorm_MPISBAIJ, 1999 MatAssemblyBegin_MPISBAIJ, 2000 MatAssemblyEnd_MPISBAIJ, 2001 0, 2002 MatSetOption_MPISBAIJ, 2003 MatZeroEntries_MPISBAIJ, 2004 MatZeroRows_MPISBAIJ, 2005 0, 2006 0, 2007 0, 2008 0, 2009 MatGetSize_MPISBAIJ, 2010 MatGetLocalSize_MPISBAIJ, 2011 MatGetOwnershipRange_MPISBAIJ, 2012 0, 2013 0, 2014 0, 2015 0, 2016 MatDuplicate_MPISBAIJ, 2017 0, 2018 0, 2019 0, 2020 0, 2021 0, 2022 MatGetSubMatrices_MPISBAIJ, 2023 MatIncreaseOverlap_MPISBAIJ, 2024 MatGetValues_MPISBAIJ, 2025 0, 2026 MatPrintHelp_MPISBAIJ, 2027 MatScale_MPISBAIJ, 2028 0, 2029 0, 2030 0, 2031 MatGetBlockSize_MPISBAIJ, 2032 0, 2033 0, 2034 0, 2035 0, 2036 0, 2037 0, 2038 MatSetUnfactored_MPISBAIJ, 2039 0, 2040 MatSetValuesBlocked_MPISBAIJ, 2041 0, 2042 0, 2043 0, 2044 MatGetMaps_Petsc}; 2045 2046 2047 EXTERN_C_BEGIN 2048 #undef __FUNC__ 2049 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ" 2050 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2051 { 2052 PetscFunctionBegin; 2053 *a = ((Mat_MPISBAIJ *)A->data)->A; 2054 *iscopy = PETSC_FALSE; 2055 PetscFunctionReturn(0); 2056 } 2057 EXTERN_C_END 2058 2059 #undef __FUNC__ 2060 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ" 2061 /*@C 2062 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2063 (block compressed row). For good matrix assembly performance 2064 the user should preallocate the matrix storage by setting the parameters 2065 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2066 performance can be increased by more than a factor of 50. 2067 2068 Collective on MPI_Comm 2069 2070 Input Parameters: 2071 + comm - MPI communicator 2072 . bs - size of blockk 2073 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2074 This value should be the same as the local size used in creating the 2075 y vector for the matrix-vector product y = Ax. 2076 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2077 This value should be the same as the local size used in creating the 2078 x vector for the matrix-vector product y = Ax. 2079 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2080 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2081 . d_nz - number of block nonzeros per block row in diagonal portion of local 2082 submatrix (same for all local rows) 2083 . d_nnz - array containing the number of block nonzeros in the various block rows 2084 of the in diagonal portion of the local (possibly different for each block 2085 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2086 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2087 submatrix (same for all local rows). 2088 - o_nnz - array containing the number of nonzeros in the various block rows of the 2089 off-diagonal portion of the local submatrix (possibly different for 2090 each block row) or PETSC_NULL. 2091 2092 Output Parameter: 2093 . A - the matrix 2094 2095 Options Database Keys: 2096 . -mat_no_unroll - uses code that does not unroll the loops in the 2097 block calculations (much slower) 2098 . -mat_block_size - size of the blocks to use 2099 . -mat_mpi - use the parallel matrix data structures even on one processor 2100 (defaults to using SeqBAIJ format on one processor) 2101 2102 Notes: 2103 The user MUST specify either the local or global matrix dimensions 2104 (possibly both). 2105 2106 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2107 than it must be used on all processors that share the object for that argument. 2108 2109 Storage Information: 2110 For a square global matrix we define each processor's diagonal portion 2111 to be its local rows and the corresponding columns (a square submatrix); 2112 each processor's off-diagonal portion encompasses the remainder of the 2113 local matrix (a rectangular submatrix). 2114 2115 The user can specify preallocated storage for the diagonal part of 2116 the local submatrix with either d_nz or d_nnz (not both). Set 2117 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2118 memory allocation. Likewise, specify preallocated storage for the 2119 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2120 2121 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2122 the figure below we depict these three local rows and all columns (0-11). 2123 2124 .vb 2125 0 1 2 3 4 5 6 7 8 9 10 11 2126 ------------------- 2127 row 3 | o o o d d d o o o o o o 2128 row 4 | o o o d d d o o o o o o 2129 row 5 | o o o d d d o o o o o o 2130 ------------------- 2131 .ve 2132 2133 Thus, any entries in the d locations are stored in the d (diagonal) 2134 submatrix, and any entries in the o locations are stored in the 2135 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2136 stored simply in the MATSEQBAIJ format for compressed row storage. 2137 2138 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2139 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2140 In general, for PDE problems in which most nonzeros are near the diagonal, 2141 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2142 or you will get TERRIBLE performance; see the users' manual chapter on 2143 matrices. 2144 2145 Level: intermediate 2146 2147 .keywords: matrix, block, aij, compressed row, sparse, parallel 2148 2149 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPISBAIJ() 2150 @*/ 2151 2152 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2153 { 2154 Mat B; 2155 Mat_MPISBAIJ *b; 2156 int ierr,i,sum[1],work[1],mbs,Mbs=PETSC_DECIDE,size; 2157 PetscTruth flag1,flag2,flg; 2158 2159 PetscFunctionBegin; 2160 if (M != N || m != n){ /* N and n are not used after this */ 2161 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, set M=N and m=n"); 2162 } 2163 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2164 2165 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 2166 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 2167 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 2168 if (d_nnz) { 2169 for (i=0; i<m/bs; i++) { 2170 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2171 } 2172 } 2173 if (o_nnz) { 2174 for (i=0; i<m/bs; i++) { 2175 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2176 } 2177 } 2178 2179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2180 ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr); 2181 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2182 if (!flag1 && !flag2 && size == 1) { 2183 if (M == PETSC_DECIDE) M = m; 2184 ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 *A = 0; 2189 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView); 2190 PLogObjectCreate(B); 2191 B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b); 2192 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 2193 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2194 2195 B->ops->destroy = MatDestroy_MPISBAIJ; 2196 B->ops->view = MatView_MPISBAIJ; 2197 B->mapping = 0; 2198 B->factor = 0; 2199 B->assembled = PETSC_FALSE; 2200 2201 B->insertmode = NOT_SET_VALUES; 2202 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2203 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2204 2205 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2206 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2207 } 2208 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2209 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2210 } 2211 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2212 2213 if (M == PETSC_DECIDE) { 2214 work[0] = m; mbs = m/bs; 2215 ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2216 M = sum[0]; Mbs = M/bs; 2217 } else { /* M is specified */ 2218 Mbs = M/bs; 2219 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2220 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2221 m = mbs*bs; 2222 } 2223 2224 if (mbs*bs != m) { 2225 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows/cols must be divisible by blocksize"); 2226 } 2227 2228 b->m = m; B->m = m; 2229 b->n = m; B->n = m; 2230 b->N = M; B->N = M; 2231 b->M = M; 2232 B->M = M; 2233 b->bs = bs; 2234 b->bs2 = bs*bs; 2235 b->mbs = mbs; 2236 b->nbs = mbs; 2237 b->Mbs = Mbs; 2238 b->Nbs = Mbs; 2239 2240 /* the information in the maps duplicates the information computed below, eventually 2241 we should remove the duplicate information that is not contained in the maps */ 2242 ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr); 2243 ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr); 2244 2245 /* build local table of row and column ownerships */ 2246 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2247 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2248 b->cowners = b->rowners + b->size + 2; 2249 b->rowners_bs = b->cowners + b->size + 2; 2250 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2251 b->rowners[0] = 0; 2252 for (i=2; i<=b->size; i++) { 2253 b->rowners[i] += b->rowners[i-1]; 2254 } 2255 for (i=0; i<=b->size; i++) { 2256 b->rowners_bs[i] = b->rowners[i]*bs; 2257 } 2258 b->rstart = b->rowners[b->rank]; 2259 b->rend = b->rowners[b->rank+1]; 2260 b->rstart_bs = b->rstart * bs; 2261 b->rend_bs = b->rend * bs; 2262 2263 b->cstart = b->rstart; 2264 b->cend = b->rend; 2265 b->cstart_bs = b->cstart * bs; 2266 b->cend_bs = b->cend * bs; 2267 2268 2269 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2270 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2271 PLogObjectParent(B,b->A); 2272 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2273 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2274 PLogObjectParent(B,b->B); 2275 2276 /* build cache for off array entries formed */ 2277 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2278 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2279 b->donotstash = PETSC_FALSE; 2280 b->colmap = PETSC_NULL; 2281 b->garray = PETSC_NULL; 2282 b->roworiented = PETSC_TRUE; 2283 2284 #if defined(PEYSC_USE_MAT_SINGLE) 2285 /* stuff for MatSetValues_XXX in single precision */ 2286 b->lensetvalues = 0; 2287 b->setvaluescopy = PETSC_NULL; 2288 #endif 2289 2290 /* stuff used in block assembly */ 2291 b->barray = 0; 2292 2293 /* stuff used for matrix vector multiply */ 2294 b->lvec = 0; 2295 b->Mvctx = 0; 2296 2297 /* stuff for MatGetRow() */ 2298 b->rowindices = 0; 2299 b->rowvalues = 0; 2300 b->getrowactive = PETSC_FALSE; 2301 2302 /* hash table stuff */ 2303 b->ht = 0; 2304 b->hd = 0; 2305 b->ht_size = 0; 2306 b->ht_flag = PETSC_FALSE; 2307 b->ht_fact = 0; 2308 b->ht_total_ct = 0; 2309 b->ht_insert_ct = 0; 2310 2311 *A = B; 2312 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2313 if (flg) { 2314 double fact = 1.39; 2315 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2316 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2317 if (fact <= 1.0) fact = 1.39; 2318 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2319 PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 2320 } 2321 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2322 "MatStoreValues_MPISBAIJ", 2323 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2324 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2325 "MatRetrieveValues_MPISBAIJ", 2326 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2327 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2328 "MatGetDiagonalBlock_MPISBAIJ", 2329 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2330 PetscFunctionReturn(0); 2331 } 2332 2333 2334 #undef __FUNC__ 2335 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ" 2336 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2337 { 2338 Mat mat; 2339 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2340 int ierr,len=0; 2341 PetscTruth flg; 2342 2343 PetscFunctionBegin; 2344 *newmat = 0; 2345 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2346 PLogObjectCreate(mat); 2347 mat->data = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a); 2348 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2349 mat->ops->destroy = MatDestroy_MPISBAIJ; 2350 mat->ops->view = MatView_MPISBAIJ; 2351 mat->factor = matin->factor; 2352 mat->assembled = PETSC_TRUE; 2353 mat->insertmode = NOT_SET_VALUES; 2354 2355 a->m = mat->m = oldmat->m; 2356 a->n = mat->n = oldmat->n; 2357 a->M = mat->M = oldmat->M; 2358 a->N = mat->N = oldmat->N; 2359 2360 a->bs = oldmat->bs; 2361 a->bs2 = oldmat->bs2; 2362 a->mbs = oldmat->mbs; 2363 a->nbs = oldmat->nbs; 2364 a->Mbs = oldmat->Mbs; 2365 a->Nbs = oldmat->Nbs; 2366 2367 a->rstart = oldmat->rstart; 2368 a->rend = oldmat->rend; 2369 a->cstart = oldmat->cstart; 2370 a->cend = oldmat->cend; 2371 a->size = oldmat->size; 2372 a->rank = oldmat->rank; 2373 a->donotstash = oldmat->donotstash; 2374 a->roworiented = oldmat->roworiented; 2375 a->rowindices = 0; 2376 a->rowvalues = 0; 2377 a->getrowactive = PETSC_FALSE; 2378 a->barray = 0; 2379 a->rstart_bs = oldmat->rstart_bs; 2380 a->rend_bs = oldmat->rend_bs; 2381 a->cstart_bs = oldmat->cstart_bs; 2382 a->cend_bs = oldmat->cend_bs; 2383 2384 /* hash table stuff */ 2385 a->ht = 0; 2386 a->hd = 0; 2387 a->ht_size = 0; 2388 a->ht_flag = oldmat->ht_flag; 2389 a->ht_fact = oldmat->ht_fact; 2390 a->ht_total_ct = 0; 2391 a->ht_insert_ct = 0; 2392 2393 2394 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2395 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2396 a->cowners = a->rowners + a->size + 2; 2397 a->rowners_bs = a->cowners + a->size + 2; 2398 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2399 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2400 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2401 if (oldmat->colmap) { 2402 #if defined (PETSC_USE_CTABLE) 2403 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2404 #else 2405 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2406 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2407 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2408 #endif 2409 } else a->colmap = 0; 2410 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2411 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2412 PLogObjectMemory(mat,len*sizeof(int)); 2413 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2414 } else a->garray = 0; 2415 2416 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2417 PLogObjectParent(mat,a->lvec); 2418 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2419 2420 PLogObjectParent(mat,a->Mvctx); 2421 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2422 PLogObjectParent(mat,a->A); 2423 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2424 PLogObjectParent(mat,a->B); 2425 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2426 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2427 if (flg) { 2428 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2429 } 2430 *newmat = mat; 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #include "petscsys.h" 2435 2436 #undef __FUNC__ 2437 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ" 2438 int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat) 2439 { 2440 Mat A; 2441 int i,nz,ierr,j,rstart,rend,fd; 2442 Scalar *vals,*buf; 2443 MPI_Comm comm = ((PetscObject)viewer)->comm; 2444 MPI_Status status; 2445 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2446 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2447 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2448 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2449 int dcount,kmax,k,nzcount,tmp; 2450 2451 PetscFunctionBegin; 2452 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2453 2454 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2455 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2456 if (!rank) { 2457 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2458 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2459 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2460 if (header[3] < 0) { 2461 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPISBAIJ"); 2462 } 2463 } 2464 2465 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2466 M = header[1]; N = header[2]; 2467 2468 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2469 2470 /* 2471 This code adds extra rows to make sure the number of rows is 2472 divisible by the blocksize 2473 */ 2474 Mbs = M/bs; 2475 extra_rows = bs - M + bs*(Mbs); 2476 if (extra_rows == bs) extra_rows = 0; 2477 else Mbs++; 2478 if (extra_rows &&!rank) { 2479 PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2480 } 2481 2482 /* determine ownership of all rows */ 2483 mbs = Mbs/size + ((Mbs % size) > rank); 2484 m = mbs * bs; 2485 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2486 browners = rowners + size + 1; 2487 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2488 rowners[0] = 0; 2489 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2490 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2491 rstart = rowners[rank]; 2492 rend = rowners[rank+1]; 2493 2494 /* distribute row lengths to all processors */ 2495 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2496 if (!rank) { 2497 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2498 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2499 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2500 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2501 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2502 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2503 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2504 } else { 2505 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2506 } 2507 2508 if (!rank) { 2509 /* calculate the number of nonzeros on each processor */ 2510 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2511 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2512 for (i=0; i<size; i++) { 2513 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2514 procsnz[i] += rowlengths[j]; 2515 } 2516 } 2517 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2518 2519 /* determine max buffer needed and allocate it */ 2520 maxnz = 0; 2521 for (i=0; i<size; i++) { 2522 maxnz = PetscMax(maxnz,procsnz[i]); 2523 } 2524 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2525 2526 /* read in my part of the matrix column indices */ 2527 nz = procsnz[0]; 2528 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2529 mycols = ibuf; 2530 if (size == 1) nz -= extra_rows; 2531 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2532 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2533 2534 /* read in every ones (except the last) and ship off */ 2535 for (i=1; i<size-1; i++) { 2536 nz = procsnz[i]; 2537 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2538 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2539 } 2540 /* read in the stuff for the last proc */ 2541 if (size != 1) { 2542 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2543 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2544 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2545 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2546 } 2547 ierr = PetscFree(cols);CHKERRQ(ierr); 2548 } else { 2549 /* determine buffer space needed for message */ 2550 nz = 0; 2551 for (i=0; i<m; i++) { 2552 nz += locrowlens[i]; 2553 } 2554 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2555 mycols = ibuf; 2556 /* receive message of column indices*/ 2557 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2558 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2559 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2560 } 2561 2562 /* loop over local rows, determining number of off diagonal entries */ 2563 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2564 odlens = dlens + (rend-rstart); 2565 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2566 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2567 masked1 = mask + Mbs; 2568 masked2 = masked1 + Mbs; 2569 rowcount = 0; nzcount = 0; 2570 for (i=0; i<mbs; i++) { 2571 dcount = 0; 2572 odcount = 0; 2573 for (j=0; j<bs; j++) { 2574 kmax = locrowlens[rowcount]; 2575 for (k=0; k<kmax; k++) { 2576 tmp = mycols[nzcount++]/bs; 2577 if (!mask[tmp]) { 2578 mask[tmp] = 1; 2579 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2580 else masked1[dcount++] = tmp; 2581 } 2582 } 2583 rowcount++; 2584 } 2585 2586 dlens[i] = dcount; 2587 odlens[i] = odcount; 2588 2589 /* zero out the mask elements we set */ 2590 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2591 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2592 } 2593 2594 /* create our matrix */ 2595 ierr = MatCreateMPISBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2596 A = *newmat; 2597 MatSetOption(A,MAT_COLUMNS_SORTED); 2598 2599 if (!rank) { 2600 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2601 /* read in my part of the matrix numerical values */ 2602 nz = procsnz[0]; 2603 vals = buf; 2604 mycols = ibuf; 2605 if (size == 1) nz -= extra_rows; 2606 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2607 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2608 2609 /* insert into matrix */ 2610 jj = rstart*bs; 2611 for (i=0; i<m; i++) { 2612 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2613 mycols += locrowlens[i]; 2614 vals += locrowlens[i]; 2615 jj++; 2616 } 2617 /* read in other processors (except the last one) and ship out */ 2618 for (i=1; i<size-1; i++) { 2619 nz = procsnz[i]; 2620 vals = buf; 2621 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2622 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2623 } 2624 /* the last proc */ 2625 if (size != 1){ 2626 nz = procsnz[i] - extra_rows; 2627 vals = buf; 2628 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2629 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2630 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2631 } 2632 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2633 } else { 2634 /* receive numeric values */ 2635 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2636 2637 /* receive message of values*/ 2638 vals = buf; 2639 mycols = ibuf; 2640 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2641 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2642 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2643 2644 /* insert into matrix */ 2645 jj = rstart*bs; 2646 for (i=0; i<m; i++) { 2647 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2648 mycols += locrowlens[i]; 2649 vals += locrowlens[i]; 2650 jj++; 2651 } 2652 } 2653 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2654 ierr = PetscFree(buf);CHKERRQ(ierr); 2655 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2656 ierr = PetscFree(rowners);CHKERRQ(ierr); 2657 ierr = PetscFree(dlens);CHKERRQ(ierr); 2658 ierr = PetscFree(mask);CHKERRQ(ierr); 2659 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2660 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2661 PetscFunctionReturn(0); 2662 } 2663 2664 #undef __FUNC__ 2665 #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor" 2666 /*@ 2667 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2668 2669 Input Parameters: 2670 . mat - the matrix 2671 . fact - factor 2672 2673 Collective on Mat 2674 2675 Level: advanced 2676 2677 Notes: 2678 This can also be set by the command line option: -mat_use_hash_table fact 2679 2680 .keywords: matrix, hashtable, factor, HT 2681 2682 .seealso: MatSetOption() 2683 @*/ 2684 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2685 { 2686 Mat_MPIBAIJ *baij; 2687 2688 PetscFunctionBegin; 2689 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2690 if (mat->type != MATMPIBAIJ) { 2691 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2692 } 2693 baij = (Mat_MPIBAIJ*)mat->data; 2694 baij->ht_fact = fact; 2695 PetscFunctionReturn(0); 2696 } 2697