1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.113 1998/03/16 18:55:44 bsmith Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" /*I "mat.h" I*/ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 23 static int CreateColmap_MPIBAIJ_Private(Mat mat) 24 { 25 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 26 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27 int nbs = B->nbs,i,bs=B->bs;; 28 29 PetscFunctionBegin; 30 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 31 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 32 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 34 PetscFunctionReturn(0); 35 } 36 37 #define CHUNKSIZE 10 38 39 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 40 { \ 41 \ 42 brow = row/bs; \ 43 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44 rmax = aimax[brow]; nrow = ailen[brow]; \ 45 bcol = col/bs; \ 46 ridx = row % bs; cidx = col % bs; \ 47 low = 0; high = nrow; \ 48 while (high-low > 3) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > bcol) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=low; _i<high; _i++ ) { \ 54 if (rp[_i] > bcol) break; \ 55 if (rp[_i] == bcol) { \ 56 bap = ap + bs2*_i + bs*cidx + ridx; \ 57 if (addv == ADD_VALUES) *bap += value; \ 58 else *bap = value; \ 59 goto a_noinsert; \ 60 } \ 61 } \ 62 if (a->nonew == 1) goto a_noinsert; \ 63 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 64 if (nrow >= rmax) { \ 65 /* there is no extra room in row, therefore enlarge */ \ 66 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 67 Scalar *new_a; \ 68 \ 69 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 70 \ 71 /* malloc new storage space */ \ 72 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 73 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 74 new_j = (int *) (new_a + bs2*new_nz); \ 75 new_i = new_j + new_nz; \ 76 \ 77 /* copy over old data into new slots */ \ 78 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 79 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 80 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 81 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 82 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 83 len*sizeof(int)); \ 84 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 85 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 86 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 87 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 88 /* free up old matrix storage */ \ 89 PetscFree(a->a); \ 90 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 91 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 92 a->singlemalloc = 1; \ 93 \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 96 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 97 a->maxnz += bs2*CHUNKSIZE; \ 98 a->reallocs++; \ 99 a->nz++; \ 100 } \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for ( ii=N; ii>=_i; ii-- ) { \ 104 rp[ii+1] = rp[ii]; \ 105 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 106 } \ 107 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 a_noinsert:; \ 111 ailen[brow] = nrow; \ 112 } 113 114 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 brow = row/bs; \ 118 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119 rmax = bimax[brow]; nrow = bilen[brow]; \ 120 bcol = col/bs; \ 121 ridx = row % bs; cidx = col % bs; \ 122 low = 0; high = nrow; \ 123 while (high-low > 3) { \ 124 t = (low+high)/2; \ 125 if (rp[t] > bcol) high = t; \ 126 else low = t; \ 127 } \ 128 for ( _i=low; _i<high; _i++ ) { \ 129 if (rp[_i] > bcol) break; \ 130 if (rp[_i] == bcol) { \ 131 bap = ap + bs2*_i + bs*cidx + ridx; \ 132 if (addv == ADD_VALUES) *bap += value; \ 133 else *bap = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (b->nonew == 1) goto b_noinsert; \ 138 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + bs2*new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158 len*sizeof(int)); \ 159 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163 /* free up old matrix storage */ \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 172 b->maxnz += bs2*CHUNKSIZE; \ 173 b->reallocs++; \ 174 b->nz++; \ 175 } \ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for ( ii=N; ii>=_i; ii-- ) { \ 179 rp[ii+1] = rp[ii]; \ 180 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181 } \ 182 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNC__ 190 #define __FUNC__ "MatSetValues_MPIBAIJ" 191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 194 Scalar value; 195 int ierr,i,j,row,col; 196 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 197 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 198 int cend_orig=baij->cend_bs,bs=baij->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 Scalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 Scalar *ba=b->a; 210 211 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 int low,high,t,ridx,cidx,bs2=a->bs2; 213 Scalar *ap,*bap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 #if defined(USE_PETSC_BOPT_g) 218 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig){ 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 #if defined(USE_PETSC_BOPT_g) 231 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 241 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ *) (B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba=b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !baij->donotstash) { 257 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!baij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272 #undef __FUNC__ 273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 Scalar *value,*barray=baij->barray; 278 int ierr,i,j,ii,jj,row,col,k,l; 279 int roworiented = baij->roworiented,rstart=baij->rstart ; 280 int rend=baij->rend,cstart=baij->cstart,stepval; 281 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282 283 if(!barray) { 284 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 285 } 286 287 if (roworiented) { 288 stepval = (n-1)*bs; 289 } else { 290 stepval = (m-1)*bs; 291 } 292 for ( i=0; i<m; i++ ) { 293 #if defined(USE_PETSC_BOPT_g) 294 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296 #endif 297 if (im[i] >= rstart && im[i] < rend) { 298 row = im[i] - rstart; 299 for ( j=0; j<n; j++ ) { 300 /* If NumCol = 1 then a copy is not required */ 301 if ((roworiented) && (n == 1)) { 302 barray = v + i*bs2; 303 } else if((!roworiented) && (m == 1)) { 304 barray = v + j*bs2; 305 } else { /* Here a copy is required */ 306 if (roworiented) { 307 value = v + i*(stepval+bs)*bs + j*bs; 308 } else { 309 value = v + j*(stepval+bs)*bs + i*bs; 310 } 311 for ( ii=0; ii<bs; ii++,value+=stepval ) { 312 for (jj=0; jj<bs; jj++ ) { 313 *barray++ = *value++; 314 } 315 } 316 barray -=bs2; 317 } 318 319 if (in[j] >= cstart && in[j] < cend){ 320 col = in[j] - cstart; 321 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322 } 323 #if defined(USE_PETSC_BOPT_g) 324 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326 #endif 327 else { 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 333 #if defined(USE_PETSC_BOPT_g) 334 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335 #endif 336 col = (baij->colmap[in[j]] - 1)/bs; 337 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339 col = in[j]; 340 } 341 } 342 else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (!baij->donotstash) { 348 if (roworiented ) { 349 row = im[i]*bs; 350 value = v + i*(stepval+bs)*bs; 351 for ( j=0; j<bs; j++,row++ ) { 352 for ( k=0; k<n; k++ ) { 353 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355 } 356 } 357 } 358 } else { 359 for ( j=0; j<n; j++ ) { 360 value = v + j*(stepval+bs)*bs + i*bs; 361 col = in[j]*bs; 362 for ( k=0; k<bs; k++,col++,value+=stepval) { 363 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 #include <math.h> 375 #define HASH_KEY 0.6180339887 376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNC__ 380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 384 int ierr,i,j,row,col; 385 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 386 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 387 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 388 double tmp; 389 Scalar ** HD = baij->hd,value; 390 #if defined(USE_PETSC_BOPT_g) 391 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 392 #endif 393 394 PetscFunctionBegin; 395 396 for ( i=0; i<m; i++ ) { 397 #if defined(USE_PETSC_BOPT_g) 398 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 399 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 400 #endif 401 row = im[i]; 402 if (row >= rstart_orig && row < rend_orig) { 403 for ( j=0; j<n; j++ ) { 404 col = in[j]; 405 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 406 /* Look up into the Hash Table */ 407 key = (row/bs)*Nbs+(col/bs)+1; 408 h1 = HASH(size,key,tmp); 409 410 411 idx = h1; 412 #if defined(USE_PETSC_BOPT_g) 413 insert_ct++; 414 total_ct++; 415 if (HT[idx] != key) { 416 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 417 if (idx == size) { 418 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 419 if (idx == h1) { 420 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 421 } 422 } 423 } 424 #else 425 if (HT[idx] != key) { 426 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 427 if (idx == size) { 428 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 429 if (idx == h1) { 430 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 431 } 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else { 440 if (roworiented && !baij->donotstash) { 441 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 442 } else { 443 if (!baij->donotstash) { 444 row = im[i]; 445 for ( j=0; j<n; j++ ) { 446 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 447 } 448 } 449 } 450 } 451 } 452 #if defined(USE_PETSC_BOPT_g) 453 baij->ht_total_ct = total_ct; 454 baij->ht_insert_ct = insert_ct; 455 #endif 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNC__ 460 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 461 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 462 { 463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 464 int ierr,i,j,ii,jj,row,col,k,l; 465 int roworiented = baij->roworiented,rstart=baij->rstart ; 466 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 467 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 468 double tmp; 469 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 470 #if defined(USE_PETSC_BOPT_g) 471 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 472 #endif 473 474 PetscFunctionBegin; 475 476 if (roworiented) { 477 stepval = (n-1)*bs; 478 } else { 479 stepval = (m-1)*bs; 480 } 481 for ( i=0; i<m; i++ ) { 482 #if defined(USE_PETSC_BOPT_g) 483 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 484 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 485 #endif 486 row = im[i]; 487 v_t = v + i*bs2; 488 if (row >= rstart && row < rend) { 489 for ( j=0; j<n; j++ ) { 490 col = in[j]; 491 492 /* Look up into the Hash Table */ 493 key = row*Nbs+col+1; 494 h1 = HASH(size,key,tmp); 495 496 idx = h1; 497 #if defined(USE_PETSC_BOPT_g) 498 total_ct++; 499 insert_ct++; 500 if (HT[idx] != key) { 501 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 502 if (idx == size) { 503 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 504 if (idx == h1) { 505 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 506 } 507 } 508 } 509 #else 510 if (HT[idx] != key) { 511 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 512 if (idx == size) { 513 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 514 if (idx == h1) { 515 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 516 } 517 } 518 } 519 #endif 520 baij_a = HD[idx]; 521 if (roworiented) { 522 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 523 /* value = v + (i*(stepval+bs)+j)*bs; */ 524 value = v_t; 525 v_t += bs; 526 if (addv == ADD_VALUES) { 527 for ( ii=0; ii<bs; ii++,value+=stepval) { 528 for ( jj=ii; jj<bs2; jj+=bs ) { 529 baij_a[jj] += *value++; 530 } 531 } 532 } else { 533 for ( ii=0; ii<bs; ii++,value+=stepval) { 534 for ( jj=ii; jj<bs2; jj+=bs ) { 535 baij_a[jj] = *value++; 536 } 537 } 538 } 539 } else { 540 value = v + j*(stepval+bs)*bs + i*bs; 541 if (addv == ADD_VALUES) { 542 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 543 for ( jj=0; jj<bs; jj++ ) { 544 baij_a[jj] += *value++; 545 } 546 } 547 } else { 548 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 549 for ( jj=0; jj<bs; jj++ ) { 550 baij_a[jj] = *value++; 551 } 552 } 553 } 554 } 555 } 556 } else { 557 if (!baij->donotstash) { 558 if (roworiented ) { 559 row = im[i]*bs; 560 value = v + i*(stepval+bs)*bs; 561 for ( j=0; j<bs; j++,row++ ) { 562 for ( k=0; k<n; k++ ) { 563 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 564 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 565 } 566 } 567 } 568 } else { 569 for ( j=0; j<n; j++ ) { 570 value = v + j*(stepval+bs)*bs + i*bs; 571 col = in[j]*bs; 572 for ( k=0; k<bs; k++,col++,value+=stepval) { 573 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 574 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 } 580 } 581 } 582 #if defined(USE_PETSC_BOPT_g) 583 baij->ht_total_ct = total_ct; 584 baij->ht_insert_ct = insert_ct; 585 #endif 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNC__ 590 #define __FUNC__ "MatGetValues_MPIBAIJ" 591 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 592 { 593 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 594 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 595 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 596 597 PetscFunctionBegin; 598 for ( i=0; i<m; i++ ) { 599 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 600 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 601 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 602 row = idxm[i] - bsrstart; 603 for ( j=0; j<n; j++ ) { 604 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 605 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 606 if (idxn[j] >= bscstart && idxn[j] < bscend){ 607 col = idxn[j] - bscstart; 608 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 609 } else { 610 if (!baij->colmap) { 611 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 612 } 613 if((baij->colmap[idxn[j]/bs]-1 < 0) || 614 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 615 else { 616 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 617 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 618 } 619 } 620 } 621 } else { 622 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 #undef __FUNC__ 629 #define __FUNC__ "MatNorm_MPIBAIJ" 630 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 631 { 632 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 633 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 634 int ierr, i,bs2=baij->bs2; 635 double sum = 0.0; 636 Scalar *v; 637 638 PetscFunctionBegin; 639 if (baij->size == 1) { 640 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 641 } else { 642 if (type == NORM_FROBENIUS) { 643 v = amat->a; 644 for (i=0; i<amat->nz*bs2; i++ ) { 645 #if defined(USE_PETSC_COMPLEX) 646 sum += real(conj(*v)*(*v)); v++; 647 #else 648 sum += (*v)*(*v); v++; 649 #endif 650 } 651 v = bmat->a; 652 for (i=0; i<bmat->nz*bs2; i++ ) { 653 #if defined(USE_PETSC_COMPLEX) 654 sum += real(conj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 660 *norm = sqrt(*norm); 661 } else { 662 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 663 } 664 } 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNC__ 669 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 670 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 673 MPI_Comm comm = mat->comm; 674 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 675 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 676 MPI_Request *send_waits,*recv_waits; 677 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 678 InsertMode addv; 679 Scalar *rvalues,*svalues; 680 681 PetscFunctionBegin; 682 /* make sure all processors are either in INSERTMODE or ADDMODE */ 683 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 684 if (addv == (ADD_VALUES|INSERT_VALUES)) { 685 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 686 } 687 mat->insertmode = addv; /* in case this processor had no cache */ 688 689 /* first count number of contributors to each processor */ 690 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 691 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 692 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 693 for ( i=0; i<baij->stash.n; i++ ) { 694 idx = baij->stash.idx[i]; 695 for ( j=0; j<size; j++ ) { 696 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 697 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 698 } 699 } 700 } 701 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 702 703 /* inform other processors of number of messages and max length*/ 704 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 705 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 706 nreceives = work[rank]; 707 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 708 nmax = work[rank]; 709 PetscFree(work); 710 711 /* post receives: 712 1) each message will consist of ordered pairs 713 (global index,value) we store the global index as a double 714 to simplify the message passing. 715 2) since we don't know how long each individual message is we 716 allocate the largest needed buffer for each receive. Potentially 717 this is a lot of wasted space. 718 719 720 This could be done better. 721 */ 722 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 723 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 724 for ( i=0; i<nreceives; i++ ) { 725 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 726 } 727 728 /* do sends: 729 1) starts[i] gives the starting index in svalues for stuff going to 730 the ith processor 731 */ 732 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 733 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 734 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 735 starts[0] = 0; 736 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 737 for ( i=0; i<baij->stash.n; i++ ) { 738 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 739 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 740 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 741 } 742 PetscFree(owner); 743 starts[0] = 0; 744 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 745 count = 0; 746 for ( i=0; i<size; i++ ) { 747 if (procs[i]) { 748 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 749 } 750 } 751 PetscFree(starts); PetscFree(nprocs); 752 753 /* Free cache space */ 754 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 755 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 756 757 baij->svalues = svalues; baij->rvalues = rvalues; 758 baij->nsends = nsends; baij->nrecvs = nreceives; 759 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 760 baij->rmax = nmax; 761 762 PetscFunctionReturn(0); 763 } 764 765 /* 766 Creates the hash table, and sets the table 767 This table is created only once. 768 If new entried need to be added to the matrix 769 then the hash table has to be destroyed and 770 recreated. 771 */ 772 #undef __FUNC__ 773 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 774 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 775 { 776 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 777 Mat A = baij->A, B=baij->B; 778 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 779 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 780 int size,bs2=baij->bs2,rstart=baij->rstart; 781 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 782 int *HT,key; 783 Scalar **HD; 784 double tmp; 785 #if defined(USE_PETSC_BOPT_g) 786 int ct=0,max=0; 787 #endif 788 789 PetscFunctionBegin; 790 baij->ht_size=(int)(factor*nz); 791 size = baij->ht_size; 792 793 if (baij->ht) { 794 PetscFunctionReturn(0); 795 } 796 797 /* Allocate Memory for Hash Table */ 798 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 799 baij->ht = (int*)(baij->hd + size); 800 HD = baij->hd; 801 HT = baij->ht; 802 803 804 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 805 806 807 /* Loop Over A */ 808 for ( i=0; i<a->mbs; i++ ) { 809 for ( j=ai[i]; j<ai[i+1]; j++ ) { 810 row = i+rstart; 811 col = aj[j]+cstart; 812 813 key = row*Nbs + col + 1; 814 h1 = HASH(size,key,tmp); 815 for ( k=0; k<size; k++ ){ 816 if (HT[(h1+k)%size] == 0.0) { 817 HT[(h1+k)%size] = key; 818 HD[(h1+k)%size] = a->a + j*bs2; 819 break; 820 #if defined(USE_PETSC_BOPT_g) 821 } else { 822 ct++; 823 #endif 824 } 825 } 826 #if defined(USE_PETSC_BOPT_g) 827 if (k> max) max = k; 828 #endif 829 } 830 } 831 /* Loop Over B */ 832 for ( i=0; i<b->mbs; i++ ) { 833 for ( j=bi[i]; j<bi[i+1]; j++ ) { 834 row = i+rstart; 835 col = garray[bj[j]]; 836 key = row*Nbs + col + 1; 837 h1 = HASH(size,key,tmp); 838 for ( k=0; k<size; k++ ){ 839 if (HT[(h1+k)%size] == 0.0) { 840 HT[(h1+k)%size] = key; 841 HD[(h1+k)%size] = b->a + j*bs2; 842 break; 843 #if defined(USE_PETSC_BOPT_g) 844 } else { 845 ct++; 846 #endif 847 } 848 } 849 #if defined(USE_PETSC_BOPT_g) 850 if (k> max) max = k; 851 #endif 852 } 853 } 854 855 /* Print Summary */ 856 #if defined(USE_PETSC_BOPT_g) 857 for ( i=0,j=0; i<size; i++) 858 if (HT[i]) {j++;} 859 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 860 (j== 0)? 0.0:((double)(ct+j))/j,max); 861 #endif 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNC__ 866 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 867 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 868 { 869 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 870 MPI_Status *send_status,recv_status; 871 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 872 int bs=baij->bs,row,col,other_disassembled; 873 Scalar *values,val; 874 InsertMode addv = mat->insertmode; 875 876 PetscFunctionBegin; 877 /* wait on receives */ 878 while (count) { 879 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 880 /* unpack receives into our local space */ 881 values = baij->rvalues + 3*imdex*baij->rmax; 882 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 883 n = n/3; 884 for ( i=0; i<n; i++ ) { 885 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 886 col = (int) PetscReal(values[3*i+1]); 887 val = values[3*i+2]; 888 if (col >= baij->cstart*bs && col < baij->cend*bs) { 889 col -= baij->cstart*bs; 890 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 891 } else { 892 if (mat->was_assembled) { 893 if (!baij->colmap) { 894 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 895 } 896 col = (baij->colmap[col/bs]) - 1 + col%bs; 897 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 898 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 899 col = (int) PetscReal(values[3*i+1]); 900 } 901 } 902 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 903 } 904 } 905 count--; 906 } 907 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 908 909 /* wait on sends */ 910 if (baij->nsends) { 911 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 912 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 913 PetscFree(send_status); 914 } 915 PetscFree(baij->send_waits); PetscFree(baij->svalues); 916 917 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 918 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 919 920 /* determine if any processor has disassembled, if so we must 921 also disassemble ourselfs, in order that we may reassemble. */ 922 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 923 if (mat->was_assembled && !other_disassembled) { 924 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 925 } 926 927 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 928 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 929 } 930 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 931 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 932 933 #if defined(USE_PETSC_BOPT_g) 934 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 935 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 936 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 937 baij->ht_total_ct = 0; 938 baij->ht_insert_ct = 0; 939 } 940 #endif 941 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 942 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 943 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 944 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 945 } 946 947 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNC__ 952 #define __FUNC__ "MatView_MPIBAIJ_Binary" 953 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 954 { 955 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 956 int ierr; 957 958 PetscFunctionBegin; 959 if (baij->size == 1) { 960 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 961 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNC__ 966 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 967 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 968 { 969 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 970 int ierr, format,rank,bs = baij->bs; 971 FILE *fd; 972 ViewerType vtype; 973 974 PetscFunctionBegin; 975 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 976 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 977 ierr = ViewerGetFormat(viewer,&format); 978 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 979 MatInfo info; 980 MPI_Comm_rank(mat->comm,&rank); 981 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 982 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 983 PetscSequentialPhaseBegin(mat->comm,1); 984 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 985 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 986 baij->bs,(int)info.memory); 987 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 988 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 989 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 990 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 991 fflush(fd); 992 PetscSequentialPhaseEnd(mat->comm,1); 993 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 996 PetscPrintf(mat->comm," block size is %d\n",bs); 997 PetscFunctionReturn(0); 998 } 999 } 1000 1001 if (vtype == DRAW_VIEWER) { 1002 Draw draw; 1003 PetscTruth isnull; 1004 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 1005 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1006 } 1007 1008 if (vtype == ASCII_FILE_VIEWER) { 1009 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1010 PetscSequentialPhaseBegin(mat->comm,1); 1011 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 1012 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 1013 baij->cstart*bs,baij->cend*bs); 1014 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1015 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 1016 fflush(fd); 1017 PetscSequentialPhaseEnd(mat->comm,1); 1018 } else { 1019 int size = baij->size; 1020 rank = baij->rank; 1021 if (size == 1) { 1022 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1023 } else { 1024 /* assemble the entire matrix onto first processor. */ 1025 Mat A; 1026 Mat_SeqBAIJ *Aloc; 1027 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 1028 int mbs=baij->mbs; 1029 Scalar *a; 1030 1031 if (!rank) { 1032 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1033 } else { 1034 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1035 } 1036 PLogObjectParent(mat,A); 1037 1038 /* copy over the A part */ 1039 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1040 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1041 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1042 1043 for ( i=0; i<mbs; i++ ) { 1044 rvals[0] = bs*(baij->rstart + i); 1045 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1046 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1047 col = (baij->cstart+aj[j])*bs; 1048 for (k=0; k<bs; k++ ) { 1049 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1050 col++; a += bs; 1051 } 1052 } 1053 } 1054 /* copy over the B part */ 1055 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1056 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1057 for ( i=0; i<mbs; i++ ) { 1058 rvals[0] = bs*(baij->rstart + i); 1059 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1060 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1061 col = baij->garray[aj[j]]*bs; 1062 for (k=0; k<bs; k++ ) { 1063 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1064 col++; a += bs; 1065 } 1066 } 1067 } 1068 PetscFree(rvals); 1069 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1070 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1071 /* 1072 Everyone has to call to draw the matrix since the graphics waits are 1073 synchronized across all processors that share the Draw object 1074 */ 1075 if (!rank || vtype == DRAW_VIEWER) { 1076 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1077 } 1078 ierr = MatDestroy(A); CHKERRQ(ierr); 1079 } 1080 } 1081 PetscFunctionReturn(0); 1082 } 1083 1084 1085 1086 #undef __FUNC__ 1087 #define __FUNC__ "MatView_MPIBAIJ" 1088 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1089 { 1090 int ierr; 1091 ViewerType vtype; 1092 1093 PetscFunctionBegin; 1094 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1095 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 1096 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 1097 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1098 } else if (vtype == BINARY_FILE_VIEWER) { 1099 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1100 } 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNC__ 1105 #define __FUNC__ "MatDestroy_MPIBAIJ" 1106 int MatDestroy_MPIBAIJ(Mat mat) 1107 { 1108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1109 int ierr; 1110 1111 PetscFunctionBegin; 1112 #if defined(USE_PETSC_LOG) 1113 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1114 #endif 1115 1116 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1117 PetscFree(baij->rowners); 1118 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1119 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1120 if (baij->colmap) PetscFree(baij->colmap); 1121 if (baij->garray) PetscFree(baij->garray); 1122 if (baij->lvec) VecDestroy(baij->lvec); 1123 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1124 if (baij->rowvalues) PetscFree(baij->rowvalues); 1125 if (baij->barray) PetscFree(baij->barray); 1126 if (baij->hd) PetscFree(baij->hd); 1127 PetscFree(baij); 1128 PLogObjectDestroy(mat); 1129 PetscHeaderDestroy(mat); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNC__ 1134 #define __FUNC__ "MatMult_MPIBAIJ" 1135 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1136 { 1137 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1138 int ierr, nt; 1139 1140 PetscFunctionBegin; 1141 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1142 if (nt != a->n) { 1143 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1144 } 1145 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1146 if (nt != a->m) { 1147 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1148 } 1149 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1150 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1151 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1152 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1153 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #undef __FUNC__ 1158 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1159 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1160 { 1161 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1162 int ierr; 1163 1164 PetscFunctionBegin; 1165 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1166 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1167 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1168 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNC__ 1173 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1174 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1175 { 1176 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1177 int ierr; 1178 1179 PetscFunctionBegin; 1180 /* do nondiagonal part */ 1181 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1182 /* send it on its way */ 1183 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1184 /* do local part */ 1185 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1186 /* receive remote parts: note this assumes the values are not actually */ 1187 /* inserted in yy until the next line, which is true for my implementation*/ 1188 /* but is not perhaps always true. */ 1189 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNC__ 1194 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1195 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1196 { 1197 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1198 int ierr; 1199 1200 PetscFunctionBegin; 1201 /* do nondiagonal part */ 1202 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1203 /* send it on its way */ 1204 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1205 /* do local part */ 1206 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1207 /* receive remote parts: note this assumes the values are not actually */ 1208 /* inserted in yy until the next line, which is true for my implementation*/ 1209 /* but is not perhaps always true. */ 1210 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 /* 1215 This only works correctly for square matrices where the subblock A->A is the 1216 diagonal block 1217 */ 1218 #undef __FUNC__ 1219 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1220 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1221 { 1222 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1223 int ierr; 1224 1225 PetscFunctionBegin; 1226 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1227 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNC__ 1232 #define __FUNC__ "MatScale_MPIBAIJ" 1233 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1234 { 1235 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1236 int ierr; 1237 1238 PetscFunctionBegin; 1239 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1240 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNC__ 1245 #define __FUNC__ "MatGetSize_MPIBAIJ" 1246 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1247 { 1248 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1249 1250 PetscFunctionBegin; 1251 if (m) *m = mat->M; 1252 if (n) *n = mat->N; 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNC__ 1257 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1258 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1259 { 1260 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1261 1262 PetscFunctionBegin; 1263 *m = mat->m; *n = mat->n; 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNC__ 1268 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1269 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1270 { 1271 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1272 1273 PetscFunctionBegin; 1274 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1275 PetscFunctionReturn(0); 1276 } 1277 1278 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1279 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1280 1281 #undef __FUNC__ 1282 #define __FUNC__ "MatGetRow_MPIBAIJ" 1283 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1284 { 1285 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1286 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1287 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1288 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1289 int *cmap, *idx_p,cstart = mat->cstart; 1290 1291 PetscFunctionBegin; 1292 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1293 mat->getrowactive = PETSC_TRUE; 1294 1295 if (!mat->rowvalues && (idx || v)) { 1296 /* 1297 allocate enough space to hold information from the longest row. 1298 */ 1299 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1300 int max = 1,mbs = mat->mbs,tmp; 1301 for ( i=0; i<mbs; i++ ) { 1302 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1303 if (max < tmp) { max = tmp; } 1304 } 1305 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1306 CHKPTRQ(mat->rowvalues); 1307 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1308 } 1309 1310 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1311 lrow = row - brstart; 1312 1313 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1314 if (!v) {pvA = 0; pvB = 0;} 1315 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1316 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1317 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1318 nztot = nzA + nzB; 1319 1320 cmap = mat->garray; 1321 if (v || idx) { 1322 if (nztot) { 1323 /* Sort by increasing column numbers, assuming A and B already sorted */ 1324 int imark = -1; 1325 if (v) { 1326 *v = v_p = mat->rowvalues; 1327 for ( i=0; i<nzB; i++ ) { 1328 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1329 else break; 1330 } 1331 imark = i; 1332 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1333 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1334 } 1335 if (idx) { 1336 *idx = idx_p = mat->rowindices; 1337 if (imark > -1) { 1338 for ( i=0; i<imark; i++ ) { 1339 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1340 } 1341 } else { 1342 for ( i=0; i<nzB; i++ ) { 1343 if (cmap[cworkB[i]/bs] < cstart) 1344 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1345 else break; 1346 } 1347 imark = i; 1348 } 1349 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1350 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1351 } 1352 } else { 1353 if (idx) *idx = 0; 1354 if (v) *v = 0; 1355 } 1356 } 1357 *nz = nztot; 1358 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1359 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #undef __FUNC__ 1364 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1365 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1366 { 1367 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1368 1369 PetscFunctionBegin; 1370 if (baij->getrowactive == PETSC_FALSE) { 1371 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1372 } 1373 baij->getrowactive = PETSC_FALSE; 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNC__ 1378 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1379 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1380 { 1381 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1382 1383 PetscFunctionBegin; 1384 *bs = baij->bs; 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNC__ 1389 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1390 int MatZeroEntries_MPIBAIJ(Mat A) 1391 { 1392 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1393 int ierr; 1394 1395 PetscFunctionBegin; 1396 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1397 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNC__ 1402 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1403 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1404 { 1405 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1406 Mat A = a->A, B = a->B; 1407 int ierr; 1408 double isend[5], irecv[5]; 1409 1410 PetscFunctionBegin; 1411 info->block_size = (double)a->bs; 1412 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1413 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1414 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1415 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1416 if (flag == MAT_LOCAL) { 1417 info->nz_used = isend[0]; 1418 info->nz_allocated = isend[1]; 1419 info->nz_unneeded = isend[2]; 1420 info->memory = isend[3]; 1421 info->mallocs = isend[4]; 1422 } else if (flag == MAT_GLOBAL_MAX) { 1423 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 1424 info->nz_used = irecv[0]; 1425 info->nz_allocated = irecv[1]; 1426 info->nz_unneeded = irecv[2]; 1427 info->memory = irecv[3]; 1428 info->mallocs = irecv[4]; 1429 } else if (flag == MAT_GLOBAL_SUM) { 1430 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 1431 info->nz_used = irecv[0]; 1432 info->nz_allocated = irecv[1]; 1433 info->nz_unneeded = irecv[2]; 1434 info->memory = irecv[3]; 1435 info->mallocs = irecv[4]; 1436 } 1437 info->rows_global = (double)a->M; 1438 info->columns_global = (double)a->N; 1439 info->rows_local = (double)a->m; 1440 info->columns_local = (double)a->N; 1441 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1442 info->fill_ratio_needed = 0; 1443 info->factor_mallocs = 0; 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNC__ 1448 #define __FUNC__ "MatSetOption_MPIBAIJ" 1449 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1450 { 1451 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1452 1453 PetscFunctionBegin; 1454 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1455 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1456 op == MAT_COLUMNS_UNSORTED || 1457 op == MAT_COLUMNS_SORTED || 1458 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1459 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1460 MatSetOption(a->A,op); 1461 MatSetOption(a->B,op); 1462 } else if (op == MAT_ROW_ORIENTED) { 1463 a->roworiented = 1; 1464 MatSetOption(a->A,op); 1465 MatSetOption(a->B,op); 1466 } else if (op == MAT_ROWS_SORTED || 1467 op == MAT_ROWS_UNSORTED || 1468 op == MAT_SYMMETRIC || 1469 op == MAT_STRUCTURALLY_SYMMETRIC || 1470 op == MAT_YES_NEW_DIAGONALS || 1471 op == MAT_USE_HASH_TABLE) 1472 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1473 else if (op == MAT_COLUMN_ORIENTED) { 1474 a->roworiented = 0; 1475 MatSetOption(a->A,op); 1476 MatSetOption(a->B,op); 1477 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1478 a->donotstash = 1; 1479 } else if (op == MAT_NO_NEW_DIAGONALS) { 1480 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1481 } else if (op == MAT_USE_HASH_TABLE) { 1482 a->ht_flag = 1; 1483 } else { 1484 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1485 } 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNC__ 1490 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1491 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1492 { 1493 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1494 Mat_SeqBAIJ *Aloc; 1495 Mat B; 1496 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1497 int bs=baij->bs,mbs=baij->mbs; 1498 Scalar *a; 1499 1500 PetscFunctionBegin; 1501 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1502 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1503 CHKERRQ(ierr); 1504 1505 /* copy over the A part */ 1506 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1507 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1508 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1509 1510 for ( i=0; i<mbs; i++ ) { 1511 rvals[0] = bs*(baij->rstart + i); 1512 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1513 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1514 col = (baij->cstart+aj[j])*bs; 1515 for (k=0; k<bs; k++ ) { 1516 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1517 col++; a += bs; 1518 } 1519 } 1520 } 1521 /* copy over the B part */ 1522 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1523 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1524 for ( i=0; i<mbs; i++ ) { 1525 rvals[0] = bs*(baij->rstart + i); 1526 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1527 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1528 col = baij->garray[aj[j]]*bs; 1529 for (k=0; k<bs; k++ ) { 1530 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1531 col++; a += bs; 1532 } 1533 } 1534 } 1535 PetscFree(rvals); 1536 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1537 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1538 1539 if (matout != PETSC_NULL) { 1540 *matout = B; 1541 } else { 1542 PetscOps *Abops; 1543 struct _MatOps *Aops; 1544 1545 /* This isn't really an in-place transpose .... but free data structures from baij */ 1546 PetscFree(baij->rowners); 1547 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1548 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1549 if (baij->colmap) PetscFree(baij->colmap); 1550 if (baij->garray) PetscFree(baij->garray); 1551 if (baij->lvec) VecDestroy(baij->lvec); 1552 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1553 PetscFree(baij); 1554 1555 /* 1556 This is horrible, horrible code. We need to keep the 1557 A pointers for the bops and ops but copy everything 1558 else from C. 1559 */ 1560 Abops = A->bops; 1561 Aops = A->ops; 1562 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1563 A->bops = Abops; 1564 A->ops = Aops; 1565 1566 PetscHeaderDestroy(B); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNC__ 1572 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1573 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1574 { 1575 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1576 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1577 int ierr,s1,s2,s3; 1578 1579 PetscFunctionBegin; 1580 if (ll) { 1581 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1582 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1583 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1584 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1585 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1586 } 1587 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNC__ 1592 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1593 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1594 { 1595 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1596 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1597 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1598 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1599 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1600 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1601 MPI_Comm comm = A->comm; 1602 MPI_Request *send_waits,*recv_waits; 1603 MPI_Status recv_status,*send_status; 1604 IS istmp; 1605 1606 PetscFunctionBegin; 1607 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1608 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1609 1610 /* first count number of contributors to each processor */ 1611 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1612 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1613 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1614 for ( i=0; i<N; i++ ) { 1615 idx = rows[i]; 1616 found = 0; 1617 for ( j=0; j<size; j++ ) { 1618 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1619 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1620 } 1621 } 1622 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1623 } 1624 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1625 1626 /* inform other processors of number of messages and max length*/ 1627 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1628 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1629 nrecvs = work[rank]; 1630 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1631 nmax = work[rank]; 1632 PetscFree(work); 1633 1634 /* post receives: */ 1635 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1636 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1637 for ( i=0; i<nrecvs; i++ ) { 1638 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1639 } 1640 1641 /* do sends: 1642 1) starts[i] gives the starting index in svalues for stuff going to 1643 the ith processor 1644 */ 1645 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1646 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1647 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1648 starts[0] = 0; 1649 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1650 for ( i=0; i<N; i++ ) { 1651 svalues[starts[owner[i]]++] = rows[i]; 1652 } 1653 ISRestoreIndices(is,&rows); 1654 1655 starts[0] = 0; 1656 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1657 count = 0; 1658 for ( i=0; i<size; i++ ) { 1659 if (procs[i]) { 1660 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1661 } 1662 } 1663 PetscFree(starts); 1664 1665 base = owners[rank]*bs; 1666 1667 /* wait on receives */ 1668 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1669 source = lens + nrecvs; 1670 count = nrecvs; slen = 0; 1671 while (count) { 1672 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1673 /* unpack receives into our local space */ 1674 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1675 source[imdex] = recv_status.MPI_SOURCE; 1676 lens[imdex] = n; 1677 slen += n; 1678 count--; 1679 } 1680 PetscFree(recv_waits); 1681 1682 /* move the data into the send scatter */ 1683 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1684 count = 0; 1685 for ( i=0; i<nrecvs; i++ ) { 1686 values = rvalues + i*nmax; 1687 for ( j=0; j<lens[i]; j++ ) { 1688 lrows[count++] = values[j] - base; 1689 } 1690 } 1691 PetscFree(rvalues); PetscFree(lens); 1692 PetscFree(owner); PetscFree(nprocs); 1693 1694 /* actually zap the local rows */ 1695 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1696 PLogObjectParent(A,istmp); 1697 1698 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1699 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1700 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1701 1702 if (diag) { 1703 for ( i = 0; i < slen; i++ ) { 1704 row = lrows[i] + rstart_bs; 1705 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1706 } 1707 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1708 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1709 } 1710 PetscFree(lrows); 1711 1712 /* wait on sends */ 1713 if (nsends) { 1714 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1715 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1716 PetscFree(send_status); 1717 } 1718 PetscFree(send_waits); PetscFree(svalues); 1719 1720 PetscFunctionReturn(0); 1721 } 1722 extern int MatPrintHelp_SeqBAIJ(Mat); 1723 #undef __FUNC__ 1724 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1725 int MatPrintHelp_MPIBAIJ(Mat A) 1726 { 1727 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1728 MPI_Comm comm = A->comm; 1729 static int called = 0; 1730 int ierr; 1731 1732 PetscFunctionBegin; 1733 if (!a->rank) { 1734 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1735 } 1736 if (called) {PetscFunctionReturn(0);} else called = 1; 1737 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1738 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNC__ 1743 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1744 int MatSetUnfactored_MPIBAIJ(Mat A) 1745 { 1746 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1747 int ierr; 1748 1749 PetscFunctionBegin; 1750 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1751 PetscFunctionReturn(0); 1752 } 1753 1754 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1755 1756 /* -------------------------------------------------------------------*/ 1757 static struct _MatOps MatOps = { 1758 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1759 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1760 0,0,0,0, 1761 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1762 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1763 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1764 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1765 0,0,0,MatGetSize_MPIBAIJ, 1766 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1767 0,0,MatConvertSameType_MPIBAIJ,0,0, 1768 0,0,0,MatGetSubMatrices_MPIBAIJ, 1769 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1770 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1771 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1772 1773 1774 #undef __FUNC__ 1775 #define __FUNC__ "MatCreateMPIBAIJ" 1776 /*@C 1777 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1778 (block compressed row). For good matrix assembly performance 1779 the user should preallocate the matrix storage by setting the parameters 1780 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1781 performance can be increased by more than a factor of 50. 1782 1783 Input Parameters: 1784 . comm - MPI communicator 1785 . bs - size of blockk 1786 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1787 This value should be the same as the local size used in creating the 1788 y vector for the matrix-vector product y = Ax. 1789 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1790 This value should be the same as the local size used in creating the 1791 x vector for the matrix-vector product y = Ax. 1792 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1793 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1794 . d_nz - number of block nonzeros per block row in diagonal portion of local 1795 submatrix (same for all local rows) 1796 . d_nzz - array containing the number of block nonzeros in the various block rows 1797 of the in diagonal portion of the local (possibly different for each block 1798 row) or PETSC_NULL. You must leave room for the diagonal entry even if 1799 it is zero. 1800 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1801 submatrix (same for all local rows). 1802 . o_nzz - array containing the number of nonzeros in the various block rows of the 1803 off-diagonal portion of the local submatrix (possibly different for 1804 each block row) or PETSC_NULL. 1805 1806 Output Parameter: 1807 . A - the matrix 1808 1809 Notes: 1810 The user MUST specify either the local or global matrix dimensions 1811 (possibly both). 1812 1813 Storage Information: 1814 For a square global matrix we define each processor's diagonal portion 1815 to be its local rows and the corresponding columns (a square submatrix); 1816 each processor's off-diagonal portion encompasses the remainder of the 1817 local matrix (a rectangular submatrix). 1818 1819 The user can specify preallocated storage for the diagonal part of 1820 the local submatrix with either d_nz or d_nnz (not both). Set 1821 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1822 memory allocation. Likewise, specify preallocated storage for the 1823 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1824 1825 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1826 the figure below we depict these three local rows and all columns (0-11). 1827 1828 $ 0 1 2 3 4 5 6 7 8 9 10 11 1829 $ ------------------- 1830 $ row 3 | o o o d d d o o o o o o 1831 $ row 4 | o o o d d d o o o o o o 1832 $ row 5 | o o o d d d o o o o o o 1833 $ ------------------- 1834 $ 1835 1836 Thus, any entries in the d locations are stored in the d (diagonal) 1837 submatrix, and any entries in the o locations are stored in the 1838 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1839 stored simply in the MATSEQBAIJ format for compressed row storage. 1840 1841 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1842 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1843 In general, for PDE problems in which most nonzeros are near the diagonal, 1844 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1845 or you will get TERRIBLE performance; see the users' manual chapter on 1846 matrices. 1847 1848 .keywords: matrix, block, aij, compressed row, sparse, parallel 1849 1850 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 1851 @*/ 1852 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1853 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1854 { 1855 Mat B; 1856 Mat_MPIBAIJ *b; 1857 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1858 1859 PetscFunctionBegin; 1860 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1861 1862 MPI_Comm_size(comm,&size); 1863 if (size == 1) { 1864 if (M == PETSC_DECIDE) M = m; 1865 if (N == PETSC_DECIDE) N = n; 1866 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1867 PetscFunctionReturn(0); 1868 } 1869 1870 *A = 0; 1871 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1872 PLogObjectCreate(B); 1873 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1874 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1875 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1876 1877 B->ops->destroy = MatDestroy_MPIBAIJ; 1878 B->ops->view = MatView_MPIBAIJ; 1879 B->mapping = 0; 1880 B->factor = 0; 1881 B->assembled = PETSC_FALSE; 1882 1883 B->insertmode = NOT_SET_VALUES; 1884 MPI_Comm_rank(comm,&b->rank); 1885 MPI_Comm_size(comm,&b->size); 1886 1887 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1888 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1889 } 1890 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1891 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1892 } 1893 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1894 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1895 } 1896 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1897 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1898 1899 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1900 work[0] = m; work[1] = n; 1901 mbs = m/bs; nbs = n/bs; 1902 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1903 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1904 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1905 } 1906 if (m == PETSC_DECIDE) { 1907 Mbs = M/bs; 1908 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1909 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1910 m = mbs*bs; 1911 } 1912 if (n == PETSC_DECIDE) { 1913 Nbs = N/bs; 1914 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1915 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1916 n = nbs*bs; 1917 } 1918 if (mbs*bs != m || nbs*bs != n) { 1919 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1920 } 1921 1922 b->m = m; B->m = m; 1923 b->n = n; B->n = n; 1924 b->N = N; B->N = N; 1925 b->M = M; B->M = M; 1926 b->bs = bs; 1927 b->bs2 = bs*bs; 1928 b->mbs = mbs; 1929 b->nbs = nbs; 1930 b->Mbs = Mbs; 1931 b->Nbs = Nbs; 1932 1933 /* build local table of row and column ownerships */ 1934 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1935 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1936 b->cowners = b->rowners + b->size + 2; 1937 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1938 b->rowners[0] = 0; 1939 for ( i=2; i<=b->size; i++ ) { 1940 b->rowners[i] += b->rowners[i-1]; 1941 } 1942 b->rstart = b->rowners[b->rank]; 1943 b->rend = b->rowners[b->rank+1]; 1944 b->rstart_bs = b->rstart * bs; 1945 b->rend_bs = b->rend * bs; 1946 1947 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1948 b->cowners[0] = 0; 1949 for ( i=2; i<=b->size; i++ ) { 1950 b->cowners[i] += b->cowners[i-1]; 1951 } 1952 b->cstart = b->cowners[b->rank]; 1953 b->cend = b->cowners[b->rank+1]; 1954 b->cstart_bs = b->cstart * bs; 1955 b->cend_bs = b->cend * bs; 1956 1957 1958 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1959 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1960 PLogObjectParent(B,b->A); 1961 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1962 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1963 PLogObjectParent(B,b->B); 1964 1965 /* build cache for off array entries formed */ 1966 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1967 b->donotstash = 0; 1968 b->colmap = 0; 1969 b->garray = 0; 1970 b->roworiented = 1; 1971 1972 /* stuff used in block assembly */ 1973 b->barray = 0; 1974 1975 /* stuff used for matrix vector multiply */ 1976 b->lvec = 0; 1977 b->Mvctx = 0; 1978 1979 /* stuff for MatGetRow() */ 1980 b->rowindices = 0; 1981 b->rowvalues = 0; 1982 b->getrowactive = PETSC_FALSE; 1983 1984 /* hash table stuff */ 1985 b->ht = 0; 1986 b->hd = 0; 1987 b->ht_size = 0; 1988 b->ht_flag = 0; 1989 b->ht_fact = 0; 1990 b->ht_total_ct = 0; 1991 b->ht_insert_ct = 0; 1992 1993 *A = B; 1994 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 1995 if (flg) { 1996 double fact = 1.39; 1997 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 1998 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 1999 if (fact <= 1.0) fact = 1.39; 2000 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2001 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2002 } 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNC__ 2007 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 2008 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 2009 { 2010 Mat mat; 2011 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2012 int ierr, len=0, flg; 2013 2014 PetscFunctionBegin; 2015 *newmat = 0; 2016 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 2017 PLogObjectCreate(mat); 2018 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2019 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 2020 mat->ops->destroy = MatDestroy_MPIBAIJ; 2021 mat->ops->view = MatView_MPIBAIJ; 2022 mat->factor = matin->factor; 2023 mat->assembled = PETSC_TRUE; 2024 2025 a->m = mat->m = oldmat->m; 2026 a->n = mat->n = oldmat->n; 2027 a->M = mat->M = oldmat->M; 2028 a->N = mat->N = oldmat->N; 2029 2030 a->bs = oldmat->bs; 2031 a->bs2 = oldmat->bs2; 2032 a->mbs = oldmat->mbs; 2033 a->nbs = oldmat->nbs; 2034 a->Mbs = oldmat->Mbs; 2035 a->Nbs = oldmat->Nbs; 2036 2037 a->rstart = oldmat->rstart; 2038 a->rend = oldmat->rend; 2039 a->cstart = oldmat->cstart; 2040 a->cend = oldmat->cend; 2041 a->size = oldmat->size; 2042 a->rank = oldmat->rank; 2043 mat->insertmode = NOT_SET_VALUES; 2044 a->rowvalues = 0; 2045 a->getrowactive = PETSC_FALSE; 2046 a->barray = 0; 2047 2048 /* hash table stuff */ 2049 a->ht = 0; 2050 a->hd = 0; 2051 a->ht_size = 0; 2052 a->ht_flag = oldmat->ht_flag; 2053 a->ht_fact = oldmat->ht_fact; 2054 a->ht_total_ct = 0; 2055 a->ht_insert_ct = 0; 2056 2057 2058 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2059 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2060 a->cowners = a->rowners + a->size + 2; 2061 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2062 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 2063 if (oldmat->colmap) { 2064 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2065 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2066 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2067 } else a->colmap = 0; 2068 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2069 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2070 PLogObjectMemory(mat,len*sizeof(int)); 2071 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2072 } else a->garray = 0; 2073 2074 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2075 PLogObjectParent(mat,a->lvec); 2076 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2077 PLogObjectParent(mat,a->Mvctx); 2078 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 2079 PLogObjectParent(mat,a->A); 2080 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 2081 PLogObjectParent(mat,a->B); 2082 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2083 if (flg) { 2084 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2085 } 2086 *newmat = mat; 2087 PetscFunctionReturn(0); 2088 } 2089 2090 #include "sys.h" 2091 2092 #undef __FUNC__ 2093 #define __FUNC__ "MatLoad_MPIBAIJ" 2094 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2095 { 2096 Mat A; 2097 int i, nz, ierr, j,rstart, rend, fd; 2098 Scalar *vals,*buf; 2099 MPI_Comm comm = ((PetscObject)viewer)->comm; 2100 MPI_Status status; 2101 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2102 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2103 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2104 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2105 int dcount,kmax,k,nzcount,tmp; 2106 2107 PetscFunctionBegin; 2108 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2109 2110 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2111 if (!rank) { 2112 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2113 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2114 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2115 if (header[3] < 0) { 2116 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2117 } 2118 } 2119 2120 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2121 M = header[1]; N = header[2]; 2122 2123 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2124 2125 /* 2126 This code adds extra rows to make sure the number of rows is 2127 divisible by the blocksize 2128 */ 2129 Mbs = M/bs; 2130 extra_rows = bs - M + bs*(Mbs); 2131 if (extra_rows == bs) extra_rows = 0; 2132 else Mbs++; 2133 if (extra_rows &&!rank) { 2134 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2135 } 2136 2137 /* determine ownership of all rows */ 2138 mbs = Mbs/size + ((Mbs % size) > rank); 2139 m = mbs * bs; 2140 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2141 browners = rowners + size + 1; 2142 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2143 rowners[0] = 0; 2144 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2145 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2146 rstart = rowners[rank]; 2147 rend = rowners[rank+1]; 2148 2149 /* distribute row lengths to all processors */ 2150 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2151 if (!rank) { 2152 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2153 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2154 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2155 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2156 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2157 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2158 PetscFree(sndcounts); 2159 } else { 2160 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2161 } 2162 2163 if (!rank) { 2164 /* calculate the number of nonzeros on each processor */ 2165 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2166 PetscMemzero(procsnz,size*sizeof(int)); 2167 for ( i=0; i<size; i++ ) { 2168 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2169 procsnz[i] += rowlengths[j]; 2170 } 2171 } 2172 PetscFree(rowlengths); 2173 2174 /* determine max buffer needed and allocate it */ 2175 maxnz = 0; 2176 for ( i=0; i<size; i++ ) { 2177 maxnz = PetscMax(maxnz,procsnz[i]); 2178 } 2179 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2180 2181 /* read in my part of the matrix column indices */ 2182 nz = procsnz[0]; 2183 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2184 mycols = ibuf; 2185 if (size == 1) nz -= extra_rows; 2186 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2187 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2188 2189 /* read in every ones (except the last) and ship off */ 2190 for ( i=1; i<size-1; i++ ) { 2191 nz = procsnz[i]; 2192 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2193 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2194 } 2195 /* read in the stuff for the last proc */ 2196 if ( size != 1 ) { 2197 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2198 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2199 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2200 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2201 } 2202 PetscFree(cols); 2203 } else { 2204 /* determine buffer space needed for message */ 2205 nz = 0; 2206 for ( i=0; i<m; i++ ) { 2207 nz += locrowlens[i]; 2208 } 2209 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2210 mycols = ibuf; 2211 /* receive message of column indices*/ 2212 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2213 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2214 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2215 } 2216 2217 /* loop over local rows, determining number of off diagonal entries */ 2218 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2219 odlens = dlens + (rend-rstart); 2220 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2221 PetscMemzero(mask,3*Mbs*sizeof(int)); 2222 masked1 = mask + Mbs; 2223 masked2 = masked1 + Mbs; 2224 rowcount = 0; nzcount = 0; 2225 for ( i=0; i<mbs; i++ ) { 2226 dcount = 0; 2227 odcount = 0; 2228 for ( j=0; j<bs; j++ ) { 2229 kmax = locrowlens[rowcount]; 2230 for ( k=0; k<kmax; k++ ) { 2231 tmp = mycols[nzcount++]/bs; 2232 if (!mask[tmp]) { 2233 mask[tmp] = 1; 2234 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2235 else masked1[dcount++] = tmp; 2236 } 2237 } 2238 rowcount++; 2239 } 2240 2241 dlens[i] = dcount; 2242 odlens[i] = odcount; 2243 2244 /* zero out the mask elements we set */ 2245 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2246 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2247 } 2248 2249 /* create our matrix */ 2250 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2251 CHKERRQ(ierr); 2252 A = *newmat; 2253 MatSetOption(A,MAT_COLUMNS_SORTED); 2254 2255 if (!rank) { 2256 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2257 /* read in my part of the matrix numerical values */ 2258 nz = procsnz[0]; 2259 vals = buf; 2260 mycols = ibuf; 2261 if (size == 1) nz -= extra_rows; 2262 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2263 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2264 2265 /* insert into matrix */ 2266 jj = rstart*bs; 2267 for ( i=0; i<m; i++ ) { 2268 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2269 mycols += locrowlens[i]; 2270 vals += locrowlens[i]; 2271 jj++; 2272 } 2273 /* read in other processors (except the last one) and ship out */ 2274 for ( i=1; i<size-1; i++ ) { 2275 nz = procsnz[i]; 2276 vals = buf; 2277 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2278 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2279 } 2280 /* the last proc */ 2281 if ( size != 1 ){ 2282 nz = procsnz[i] - extra_rows; 2283 vals = buf; 2284 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2285 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2286 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2287 } 2288 PetscFree(procsnz); 2289 } else { 2290 /* receive numeric values */ 2291 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2292 2293 /* receive message of values*/ 2294 vals = buf; 2295 mycols = ibuf; 2296 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2297 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2298 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2299 2300 /* insert into matrix */ 2301 jj = rstart*bs; 2302 for ( i=0; i<m; i++ ) { 2303 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2304 mycols += locrowlens[i]; 2305 vals += locrowlens[i]; 2306 jj++; 2307 } 2308 } 2309 PetscFree(locrowlens); 2310 PetscFree(buf); 2311 PetscFree(ibuf); 2312 PetscFree(rowners); 2313 PetscFree(dlens); 2314 PetscFree(mask); 2315 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2316 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2317 PetscFunctionReturn(0); 2318 } 2319 2320 2321 2322 #undef __FUNC__ 2323 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2324 /*@ 2325 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2326 2327 Input Parameters: 2328 . mat - the matrix 2329 . fact - factor 2330 2331 Notes: 2332 This can also be set by the command line option: -mat_use_hash_table fact 2333 2334 .keywords: matrix, hashtable, factor, HT 2335 2336 .seealso: MatSetOption() 2337 @*/ 2338 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2339 { 2340 Mat_MPIBAIJ *baij; 2341 2342 PetscFunctionBegin; 2343 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2344 if (mat->type != MATMPIBAIJ) { 2345 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2346 } 2347 baij = (Mat_MPIBAIJ*) mat->data; 2348 baij->ht_fact = fact; 2349 PetscFunctionReturn(0); 2350 } 2351