1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.111 1998/03/04 16:07:09 balay Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" /*I "mat.h" I*/ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 23 static int CreateColmap_MPIBAIJ_Private(Mat mat) 24 { 25 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 26 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27 int nbs = B->nbs,i,bs=B->bs;; 28 29 PetscFunctionBegin; 30 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 31 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 32 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 34 PetscFunctionReturn(0); 35 } 36 37 #define CHUNKSIZE 10 38 39 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 40 { \ 41 \ 42 brow = row/bs; \ 43 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44 rmax = aimax[brow]; nrow = ailen[brow]; \ 45 bcol = col/bs; \ 46 ridx = row % bs; cidx = col % bs; \ 47 low = 0; high = nrow; \ 48 while (high-low > 3) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > bcol) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=low; _i<high; _i++ ) { \ 54 if (rp[_i] > bcol) break; \ 55 if (rp[_i] == bcol) { \ 56 bap = ap + bs2*_i + bs*cidx + ridx; \ 57 if (addv == ADD_VALUES) *bap += value; \ 58 else *bap = value; \ 59 goto a_noinsert; \ 60 } \ 61 } \ 62 if (a->nonew == 1) goto a_noinsert; \ 63 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 64 if (nrow >= rmax) { \ 65 /* there is no extra room in row, therefore enlarge */ \ 66 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 67 Scalar *new_a; \ 68 \ 69 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 70 \ 71 /* malloc new storage space */ \ 72 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 73 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 74 new_j = (int *) (new_a + bs2*new_nz); \ 75 new_i = new_j + new_nz; \ 76 \ 77 /* copy over old data into new slots */ \ 78 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 79 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 80 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 81 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 82 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 83 len*sizeof(int)); \ 84 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 85 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 86 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 87 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 88 /* free up old matrix storage */ \ 89 PetscFree(a->a); \ 90 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 91 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 92 a->singlemalloc = 1; \ 93 \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 96 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 97 a->maxnz += bs2*CHUNKSIZE; \ 98 a->reallocs++; \ 99 a->nz++; \ 100 } \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for ( ii=N; ii>=_i; ii-- ) { \ 104 rp[ii+1] = rp[ii]; \ 105 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 106 } \ 107 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 a_noinsert:; \ 111 ailen[brow] = nrow; \ 112 } 113 114 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 brow = row/bs; \ 118 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119 rmax = bimax[brow]; nrow = bilen[brow]; \ 120 bcol = col/bs; \ 121 ridx = row % bs; cidx = col % bs; \ 122 low = 0; high = nrow; \ 123 while (high-low > 3) { \ 124 t = (low+high)/2; \ 125 if (rp[t] > bcol) high = t; \ 126 else low = t; \ 127 } \ 128 for ( _i=low; _i<high; _i++ ) { \ 129 if (rp[_i] > bcol) break; \ 130 if (rp[_i] == bcol) { \ 131 bap = ap + bs2*_i + bs*cidx + ridx; \ 132 if (addv == ADD_VALUES) *bap += value; \ 133 else *bap = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (b->nonew == 1) goto b_noinsert; \ 138 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + bs2*new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158 len*sizeof(int)); \ 159 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163 /* free up old matrix storage */ \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 172 b->maxnz += bs2*CHUNKSIZE; \ 173 b->reallocs++; \ 174 b->nz++; \ 175 } \ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for ( ii=N; ii>=_i; ii-- ) { \ 179 rp[ii+1] = rp[ii]; \ 180 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181 } \ 182 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNC__ 190 #define __FUNC__ "MatSetValues_MPIBAIJ" 191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 194 Scalar value; 195 int ierr,i,j,row,col; 196 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 197 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 198 int cend_orig=baij->cend_bs,bs=baij->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 Scalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 Scalar *ba=b->a; 210 211 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 int low,high,t,ridx,cidx,bs2=a->bs2; 213 Scalar *ap,*bap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 #if defined(USE_PETSC_BOPT_g) 218 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig){ 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 #if defined(USE_PETSC_BOPT_g) 231 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 241 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ *) (B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba=b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !baij->donotstash) { 257 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!baij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272 #undef __FUNC__ 273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 Scalar *value,*barray=baij->barray; 278 int ierr,i,j,ii,jj,row,col,k,l; 279 int roworiented = baij->roworiented,rstart=baij->rstart ; 280 int rend=baij->rend,cstart=baij->cstart,stepval; 281 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282 283 if(!barray) { 284 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 285 } 286 287 if (roworiented) { 288 stepval = (n-1)*bs; 289 } else { 290 stepval = (m-1)*bs; 291 } 292 for ( i=0; i<m; i++ ) { 293 #if defined(USE_PETSC_BOPT_g) 294 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296 #endif 297 if (im[i] >= rstart && im[i] < rend) { 298 row = im[i] - rstart; 299 for ( j=0; j<n; j++ ) { 300 /* If NumCol = 1 then a copy is not required */ 301 if ((roworiented) && (n == 1)) { 302 barray = v + i*bs2; 303 } else if((!roworiented) && (m == 1)) { 304 barray = v + j*bs2; 305 } else { /* Here a copy is required */ 306 if (roworiented) { 307 value = v + i*(stepval+bs)*bs + j*bs; 308 } else { 309 value = v + j*(stepval+bs)*bs + i*bs; 310 } 311 for ( ii=0; ii<bs; ii++,value+=stepval ) { 312 for (jj=0; jj<bs; jj++ ) { 313 *barray++ = *value++; 314 } 315 } 316 barray -=bs2; 317 } 318 319 if (in[j] >= cstart && in[j] < cend){ 320 col = in[j] - cstart; 321 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322 } 323 #if defined(USE_PETSC_BOPT_g) 324 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326 #endif 327 else { 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 333 #if defined(USE_PETSC_BOPT_g) 334 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335 #endif 336 col = (baij->colmap[in[j]] - 1)/bs; 337 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339 col = in[j]; 340 } 341 } 342 else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (!baij->donotstash) { 348 if (roworiented ) { 349 row = im[i]*bs; 350 value = v + i*(stepval+bs)*bs; 351 for ( j=0; j<bs; j++,row++ ) { 352 for ( k=0; k<n; k++ ) { 353 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355 } 356 } 357 } 358 } else { 359 for ( j=0; j<n; j++ ) { 360 value = v + j*(stepval+bs)*bs + i*bs; 361 col = in[j]*bs; 362 for ( k=0; k<bs; k++,col++,value+=stepval) { 363 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 #include <math.h> 375 #define HASH_KEY 0.6180339887 376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNC__ 380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 384 int ierr,i,j,row,col; 385 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 386 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 387 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 388 double tmp; 389 Scalar ** HD = baij->hd,value; 390 #if defined(USE_PETSC_BOPT_g) 391 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 392 #endif 393 394 PetscFunctionBegin; 395 396 for ( i=0; i<m; i++ ) { 397 #if defined(USE_PETSC_BOPT_g) 398 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 399 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 400 #endif 401 row = im[i]; 402 if (row >= rstart_orig && row < rend_orig) { 403 for ( j=0; j<n; j++ ) { 404 col = in[j]; 405 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 406 /* Look up into the Hash Table */ 407 key = (row/bs)*Nbs+(col/bs)+1; 408 h1 = HASH(size,key,tmp); 409 410 411 idx = h1; 412 #if defined(USE_PETSC_BOPT_g) 413 insert_ct++; 414 total_ct++; 415 if (HT[idx] != key) { 416 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 417 if (idx == size) { 418 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 419 if (idx == h1) { 420 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 421 } 422 } 423 } 424 #else 425 if (HT[idx] != key) { 426 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 427 if (idx == size) { 428 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 429 if (idx == h1) { 430 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 431 } 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else { 440 if (roworiented && !baij->donotstash) { 441 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 442 } else { 443 if (!baij->donotstash) { 444 row = im[i]; 445 for ( j=0; j<n; j++ ) { 446 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 447 } 448 } 449 } 450 } 451 } 452 #if defined(USE_PETSC_BOPT_g) 453 baij->ht_total_ct = total_ct; 454 baij->ht_insert_ct = insert_ct; 455 #endif 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNC__ 460 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 461 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 462 { 463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 464 int ierr,i,j,ii,jj,row,col,k,l; 465 int roworiented = baij->roworiented,rstart=baij->rstart ; 466 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 467 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 468 double tmp; 469 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 470 #if defined(USE_PETSC_BOPT_g) 471 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 472 #endif 473 474 PetscFunctionBegin; 475 476 if (roworiented) { 477 stepval = (n-1)*bs; 478 } else { 479 stepval = (m-1)*bs; 480 } 481 for ( i=0; i<m; i++ ) { 482 #if defined(USE_PETSC_BOPT_g) 483 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 484 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 485 #endif 486 row = im[i]; 487 v_t = v + i*bs2; 488 if (row >= rstart && row < rend) { 489 for ( j=0; j<n; j++ ) { 490 col = in[j]; 491 492 /* Look up into the Hash Table */ 493 key = row*Nbs+col+1; 494 h1 = HASH(size,key,tmp); 495 496 idx = h1; 497 #if defined(USE_PETSC_BOPT_g) 498 total_ct++; 499 insert_ct++; 500 if (HT[idx] != key) { 501 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 502 if (idx == size) { 503 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 504 if (idx == h1) { 505 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 506 } 507 } 508 } 509 #else 510 if (HT[idx] != key) { 511 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 512 if (idx == size) { 513 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 514 if (idx == h1) { 515 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 516 } 517 } 518 } 519 #endif 520 baij_a = HD[idx]; 521 if (roworiented) { 522 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 523 /* value = v + (i*(stepval+bs)+j)*bs; */ 524 value = v_t; 525 v_t += bs; 526 if (addv == ADD_VALUES) { 527 for ( ii=0; ii<bs; ii++,value+=stepval) { 528 for ( jj=ii; jj<bs2; jj+=bs ) { 529 baij_a[jj] += *value++; 530 } 531 } 532 } else { 533 for ( ii=0; ii<bs; ii++,value+=stepval) { 534 for ( jj=ii; jj<bs2; jj+=bs ) { 535 baij_a[jj] = *value++; 536 } 537 } 538 } 539 } else { 540 value = v + j*(stepval+bs)*bs + i*bs; 541 if (addv == ADD_VALUES) { 542 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 543 for ( jj=0; jj<bs; jj++ ) { 544 baij_a[jj] += *value++; 545 } 546 } 547 } else { 548 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 549 for ( jj=0; jj<bs; jj++ ) { 550 baij_a[jj] = *value++; 551 } 552 } 553 } 554 } 555 } 556 } else { 557 if (!baij->donotstash) { 558 if (roworiented ) { 559 row = im[i]*bs; 560 value = v + i*(stepval+bs)*bs; 561 for ( j=0; j<bs; j++,row++ ) { 562 for ( k=0; k<n; k++ ) { 563 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 564 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 565 } 566 } 567 } 568 } else { 569 for ( j=0; j<n; j++ ) { 570 value = v + j*(stepval+bs)*bs + i*bs; 571 col = in[j]*bs; 572 for ( k=0; k<bs; k++,col++,value+=stepval) { 573 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 574 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 } 580 } 581 } 582 #if defined(USE_PETSC_BOPT_g) 583 baij->ht_total_ct = total_ct; 584 baij->ht_insert_ct = insert_ct; 585 #endif 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNC__ 590 #define __FUNC__ "MatGetValues_MPIBAIJ" 591 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 592 { 593 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 594 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 595 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 596 597 PetscFunctionBegin; 598 for ( i=0; i<m; i++ ) { 599 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 600 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 601 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 602 row = idxm[i] - bsrstart; 603 for ( j=0; j<n; j++ ) { 604 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 605 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 606 if (idxn[j] >= bscstart && idxn[j] < bscend){ 607 col = idxn[j] - bscstart; 608 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 609 } else { 610 if (!baij->colmap) { 611 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 612 } 613 if((baij->colmap[idxn[j]/bs]-1 < 0) || 614 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 615 else { 616 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 617 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 618 } 619 } 620 } 621 } else { 622 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 #undef __FUNC__ 629 #define __FUNC__ "MatNorm_MPIBAIJ" 630 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 631 { 632 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 633 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 634 int ierr, i,bs2=baij->bs2; 635 double sum = 0.0; 636 Scalar *v; 637 638 PetscFunctionBegin; 639 if (baij->size == 1) { 640 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 641 } else { 642 if (type == NORM_FROBENIUS) { 643 v = amat->a; 644 for (i=0; i<amat->nz*bs2; i++ ) { 645 #if defined(USE_PETSC_COMPLEX) 646 sum += real(conj(*v)*(*v)); v++; 647 #else 648 sum += (*v)*(*v); v++; 649 #endif 650 } 651 v = bmat->a; 652 for (i=0; i<bmat->nz*bs2; i++ ) { 653 #if defined(USE_PETSC_COMPLEX) 654 sum += real(conj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 660 *norm = sqrt(*norm); 661 } else { 662 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 663 } 664 } 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNC__ 669 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 670 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 673 MPI_Comm comm = mat->comm; 674 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 675 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 676 MPI_Request *send_waits,*recv_waits; 677 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 678 InsertMode addv; 679 Scalar *rvalues,*svalues; 680 681 PetscFunctionBegin; 682 /* make sure all processors are either in INSERTMODE or ADDMODE */ 683 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 684 if (addv == (ADD_VALUES|INSERT_VALUES)) { 685 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 686 } 687 mat->insertmode = addv; /* in case this processor had no cache */ 688 689 /* first count number of contributors to each processor */ 690 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 691 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 692 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 693 for ( i=0; i<baij->stash.n; i++ ) { 694 idx = baij->stash.idx[i]; 695 for ( j=0; j<size; j++ ) { 696 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 697 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 698 } 699 } 700 } 701 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 702 703 /* inform other processors of number of messages and max length*/ 704 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 705 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 706 nreceives = work[rank]; 707 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 708 nmax = work[rank]; 709 PetscFree(work); 710 711 /* post receives: 712 1) each message will consist of ordered pairs 713 (global index,value) we store the global index as a double 714 to simplify the message passing. 715 2) since we don't know how long each individual message is we 716 allocate the largest needed buffer for each receive. Potentially 717 this is a lot of wasted space. 718 719 720 This could be done better. 721 */ 722 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 723 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 724 for ( i=0; i<nreceives; i++ ) { 725 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 726 } 727 728 /* do sends: 729 1) starts[i] gives the starting index in svalues for stuff going to 730 the ith processor 731 */ 732 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 733 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 734 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 735 starts[0] = 0; 736 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 737 for ( i=0; i<baij->stash.n; i++ ) { 738 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 739 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 740 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 741 } 742 PetscFree(owner); 743 starts[0] = 0; 744 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 745 count = 0; 746 for ( i=0; i<size; i++ ) { 747 if (procs[i]) { 748 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 749 } 750 } 751 PetscFree(starts); PetscFree(nprocs); 752 753 /* Free cache space */ 754 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 755 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 756 757 baij->svalues = svalues; baij->rvalues = rvalues; 758 baij->nsends = nsends; baij->nrecvs = nreceives; 759 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 760 baij->rmax = nmax; 761 762 PetscFunctionReturn(0); 763 } 764 765 /* 766 Creates the hash table, and sets the table 767 This table is created only once. 768 If new entried need to be added to the matrix 769 then the hash table has to be destroyed and 770 recreated. 771 */ 772 #undef __FUNC__ 773 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 774 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 775 { 776 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 777 Mat A = baij->A, B=baij->B; 778 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 779 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 780 int size,bs2=baij->bs2,rstart=baij->rstart; 781 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 782 int *HT,key; 783 Scalar **HD; 784 double tmp; 785 #if defined(USE_PETSC_BOPT_g) 786 int ct=0,max=0; 787 #endif 788 789 PetscFunctionBegin; 790 baij->ht_size=(int)(factor*nz); 791 size = baij->ht_size; 792 793 if (baij->ht) { 794 PetscFunctionReturn(0); 795 } 796 797 /* Allocate Memory for Hash Table */ 798 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 799 baij->ht = (int*)(baij->hd + size); 800 HD = baij->hd; 801 HT = baij->ht; 802 803 804 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 805 806 807 /* Loop Over A */ 808 for ( i=0; i<a->mbs; i++ ) { 809 for ( j=ai[i]; j<ai[i+1]; j++ ) { 810 row = i+rstart; 811 col = aj[j]+cstart; 812 813 key = row*Nbs + col + 1; 814 h1 = HASH(size,key,tmp); 815 for ( k=0; k<size; k++ ){ 816 if (HT[(h1+k)%size] == 0.0) { 817 HT[(h1+k)%size] = key; 818 HD[(h1+k)%size] = a->a + j*bs2; 819 break; 820 #if defined(USE_PETSC_BOPT_g) 821 } else { 822 ct++; 823 #endif 824 } 825 } 826 #if defined(USE_PETSC_BOPT_g) 827 if (k> max) max = k; 828 #endif 829 } 830 } 831 /* Loop Over B */ 832 for ( i=0; i<b->mbs; i++ ) { 833 for ( j=bi[i]; j<bi[i+1]; j++ ) { 834 row = i+rstart; 835 col = garray[bj[j]]; 836 key = row*Nbs + col + 1; 837 h1 = HASH(size,key,tmp); 838 for ( k=0; k<size; k++ ){ 839 if (HT[(h1+k)%size] == 0.0) { 840 HT[(h1+k)%size] = key; 841 HD[(h1+k)%size] = b->a + j*bs2; 842 break; 843 #if defined(USE_PETSC_BOPT_g) 844 } else { 845 ct++; 846 #endif 847 } 848 } 849 #if defined(USE_PETSC_BOPT_g) 850 if (k> max) max = k; 851 #endif 852 } 853 } 854 855 /* Print Summary */ 856 #if defined(USE_PETSC_BOPT_g) 857 for ( i=0,j=0; i<size; i++) 858 if (HT[i]) {j++;} 859 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 860 (j== 0)? 0.0:((double)(ct+j))/j,max); 861 #endif 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNC__ 866 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 867 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 868 { 869 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 870 MPI_Status *send_status,recv_status; 871 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 872 int bs=baij->bs,row,col,other_disassembled; 873 Scalar *values,val; 874 InsertMode addv = mat->insertmode; 875 876 PetscFunctionBegin; 877 /* wait on receives */ 878 while (count) { 879 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 880 /* unpack receives into our local space */ 881 values = baij->rvalues + 3*imdex*baij->rmax; 882 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 883 n = n/3; 884 for ( i=0; i<n; i++ ) { 885 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 886 col = (int) PetscReal(values[3*i+1]); 887 val = values[3*i+2]; 888 if (col >= baij->cstart*bs && col < baij->cend*bs) { 889 col -= baij->cstart*bs; 890 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 891 } else { 892 if (mat->was_assembled) { 893 if (!baij->colmap) { 894 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 895 } 896 col = (baij->colmap[col/bs]) - 1 + col%bs; 897 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 898 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 899 col = (int) PetscReal(values[3*i+1]); 900 } 901 } 902 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 903 } 904 } 905 count--; 906 } 907 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 908 909 /* wait on sends */ 910 if (baij->nsends) { 911 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 912 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 913 PetscFree(send_status); 914 } 915 PetscFree(baij->send_waits); PetscFree(baij->svalues); 916 917 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 918 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 919 920 /* determine if any processor has disassembled, if so we must 921 also disassemble ourselfs, in order that we may reassemble. */ 922 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 923 if (mat->was_assembled && !other_disassembled) { 924 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 925 } 926 927 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 928 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 929 } 930 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 931 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 932 933 #if defined(USE_PETSC_BOPT_g) 934 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 935 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 936 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 937 baij->ht_total_ct = 0; 938 baij->ht_insert_ct = 0; 939 } 940 #endif 941 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 942 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 943 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 944 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 945 } 946 947 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNC__ 952 #define __FUNC__ "MatView_MPIBAIJ_Binary" 953 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 954 { 955 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 956 int ierr; 957 958 PetscFunctionBegin; 959 if (baij->size == 1) { 960 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 961 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNC__ 966 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 967 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 968 { 969 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 970 int ierr, format,rank,bs = baij->bs; 971 FILE *fd; 972 ViewerType vtype; 973 974 PetscFunctionBegin; 975 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 976 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 977 ierr = ViewerGetFormat(viewer,&format); 978 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 979 MatInfo info; 980 MPI_Comm_rank(mat->comm,&rank); 981 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 982 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 983 PetscSequentialPhaseBegin(mat->comm,1); 984 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 985 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 986 baij->bs,(int)info.memory); 987 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 988 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 989 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 990 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 991 fflush(fd); 992 PetscSequentialPhaseEnd(mat->comm,1); 993 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 996 PetscPrintf(mat->comm," block size is %d\n",bs); 997 PetscFunctionReturn(0); 998 } 999 } 1000 1001 if (vtype == DRAW_VIEWER) { 1002 Draw draw; 1003 PetscTruth isnull; 1004 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 1005 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1006 } 1007 1008 if (vtype == ASCII_FILE_VIEWER) { 1009 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1010 PetscSequentialPhaseBegin(mat->comm,1); 1011 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 1012 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 1013 baij->cstart*bs,baij->cend*bs); 1014 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1015 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 1016 fflush(fd); 1017 PetscSequentialPhaseEnd(mat->comm,1); 1018 } else { 1019 int size = baij->size; 1020 rank = baij->rank; 1021 if (size == 1) { 1022 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1023 } else { 1024 /* assemble the entire matrix onto first processor. */ 1025 Mat A; 1026 Mat_SeqBAIJ *Aloc; 1027 int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 1028 int mbs=baij->mbs; 1029 Scalar *a; 1030 1031 if (!rank) { 1032 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1033 } else { 1034 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1035 } 1036 PLogObjectParent(mat,A); 1037 1038 /* copy over the A part */ 1039 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1040 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1041 row = baij->rstart; 1042 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1043 1044 for ( i=0; i<mbs; i++ ) { 1045 rvals[0] = bs*(baij->rstart + i); 1046 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1047 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1048 col = (baij->cstart+aj[j])*bs; 1049 for (k=0; k<bs; k++ ) { 1050 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1051 col++; a += bs; 1052 } 1053 } 1054 } 1055 /* copy over the B part */ 1056 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1057 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1058 row = baij->rstart*bs; 1059 for ( i=0; i<mbs; i++ ) { 1060 rvals[0] = bs*(baij->rstart + i); 1061 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1062 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1063 col = baij->garray[aj[j]]*bs; 1064 for (k=0; k<bs; k++ ) { 1065 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1066 col++; a += bs; 1067 } 1068 } 1069 } 1070 PetscFree(rvals); 1071 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1072 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1073 /* 1074 Everyone has to call to draw the matrix since the graphics waits are 1075 synchronized across all processors that share the Draw object 1076 */ 1077 if (!rank || vtype == DRAW_VIEWER) { 1078 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1079 } 1080 ierr = MatDestroy(A); CHKERRQ(ierr); 1081 } 1082 } 1083 PetscFunctionReturn(0); 1084 } 1085 1086 1087 1088 #undef __FUNC__ 1089 #define __FUNC__ "MatView_MPIBAIJ" 1090 int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 1091 { 1092 Mat mat = (Mat) obj; 1093 int ierr; 1094 ViewerType vtype; 1095 1096 PetscFunctionBegin; 1097 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1098 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 1099 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 1100 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1101 } else if (vtype == BINARY_FILE_VIEWER) { 1102 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1103 } 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNC__ 1108 #define __FUNC__ "MatDestroy_MPIBAIJ" 1109 int MatDestroy_MPIBAIJ(PetscObject obj) 1110 { 1111 Mat mat = (Mat) obj; 1112 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1113 int ierr; 1114 1115 PetscFunctionBegin; 1116 #if defined(USE_PETSC_LOG) 1117 PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 1118 #endif 1119 1120 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1121 PetscFree(baij->rowners); 1122 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1123 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1124 if (baij->colmap) PetscFree(baij->colmap); 1125 if (baij->garray) PetscFree(baij->garray); 1126 if (baij->lvec) VecDestroy(baij->lvec); 1127 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1128 if (baij->rowvalues) PetscFree(baij->rowvalues); 1129 if (baij->barray) PetscFree(baij->barray); 1130 if (baij->hd) PetscFree(baij->hd); 1131 PetscFree(baij); 1132 PLogObjectDestroy(mat); 1133 PetscHeaderDestroy(mat); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNC__ 1138 #define __FUNC__ "MatMult_MPIBAIJ" 1139 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1140 { 1141 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1142 int ierr, nt; 1143 1144 PetscFunctionBegin; 1145 VecGetLocalSize_Fast(xx,nt); 1146 if (nt != a->n) { 1147 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1148 } 1149 VecGetLocalSize_Fast(yy,nt); 1150 if (nt != a->m) { 1151 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1152 } 1153 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1154 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1155 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1156 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1157 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNC__ 1162 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1163 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1164 { 1165 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1166 int ierr; 1167 1168 PetscFunctionBegin; 1169 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1170 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1171 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1172 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNC__ 1177 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1178 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1179 { 1180 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1181 int ierr; 1182 1183 PetscFunctionBegin; 1184 /* do nondiagonal part */ 1185 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1186 /* send it on its way */ 1187 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1188 /* do local part */ 1189 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1190 /* receive remote parts: note this assumes the values are not actually */ 1191 /* inserted in yy until the next line, which is true for my implementation*/ 1192 /* but is not perhaps always true. */ 1193 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNC__ 1198 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1199 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1200 { 1201 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1202 int ierr; 1203 1204 PetscFunctionBegin; 1205 /* do nondiagonal part */ 1206 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1207 /* send it on its way */ 1208 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1209 /* do local part */ 1210 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1211 /* receive remote parts: note this assumes the values are not actually */ 1212 /* inserted in yy until the next line, which is true for my implementation*/ 1213 /* but is not perhaps always true. */ 1214 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNC__ 1223 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1224 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1225 { 1226 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1227 int ierr; 1228 1229 PetscFunctionBegin; 1230 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1231 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNC__ 1236 #define __FUNC__ "MatScale_MPIBAIJ" 1237 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1238 { 1239 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1240 int ierr; 1241 1242 PetscFunctionBegin; 1243 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1244 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNC__ 1249 #define __FUNC__ "MatGetSize_MPIBAIJ" 1250 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1251 { 1252 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1253 1254 PetscFunctionBegin; 1255 if (m) *m = mat->M; 1256 if (n) *n = mat->N; 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNC__ 1261 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1262 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1263 { 1264 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1265 1266 PetscFunctionBegin; 1267 *m = mat->m; *n = mat->n; 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNC__ 1272 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1273 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1274 { 1275 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1276 1277 PetscFunctionBegin; 1278 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1279 PetscFunctionReturn(0); 1280 } 1281 1282 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1283 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1284 1285 #undef __FUNC__ 1286 #define __FUNC__ "MatGetRow_MPIBAIJ" 1287 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1288 { 1289 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1290 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1291 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1292 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1293 int *cmap, *idx_p,cstart = mat->cstart; 1294 1295 PetscFunctionBegin; 1296 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1297 mat->getrowactive = PETSC_TRUE; 1298 1299 if (!mat->rowvalues && (idx || v)) { 1300 /* 1301 allocate enough space to hold information from the longest row. 1302 */ 1303 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1304 int max = 1,mbs = mat->mbs,tmp; 1305 for ( i=0; i<mbs; i++ ) { 1306 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1307 if (max < tmp) { max = tmp; } 1308 } 1309 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1310 CHKPTRQ(mat->rowvalues); 1311 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1312 } 1313 1314 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1315 lrow = row - brstart; 1316 1317 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1318 if (!v) {pvA = 0; pvB = 0;} 1319 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1320 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1321 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1322 nztot = nzA + nzB; 1323 1324 cmap = mat->garray; 1325 if (v || idx) { 1326 if (nztot) { 1327 /* Sort by increasing column numbers, assuming A and B already sorted */ 1328 int imark = -1; 1329 if (v) { 1330 *v = v_p = mat->rowvalues; 1331 for ( i=0; i<nzB; i++ ) { 1332 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1333 else break; 1334 } 1335 imark = i; 1336 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1337 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1338 } 1339 if (idx) { 1340 *idx = idx_p = mat->rowindices; 1341 if (imark > -1) { 1342 for ( i=0; i<imark; i++ ) { 1343 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1344 } 1345 } else { 1346 for ( i=0; i<nzB; i++ ) { 1347 if (cmap[cworkB[i]/bs] < cstart) 1348 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1349 else break; 1350 } 1351 imark = i; 1352 } 1353 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1354 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1355 } 1356 } else { 1357 if (idx) *idx = 0; 1358 if (v) *v = 0; 1359 } 1360 } 1361 *nz = nztot; 1362 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1363 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNC__ 1368 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1369 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1370 { 1371 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1372 1373 PetscFunctionBegin; 1374 if (baij->getrowactive == PETSC_FALSE) { 1375 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1376 } 1377 baij->getrowactive = PETSC_FALSE; 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNC__ 1382 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1383 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1384 { 1385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1386 1387 PetscFunctionBegin; 1388 *bs = baij->bs; 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNC__ 1393 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1394 int MatZeroEntries_MPIBAIJ(Mat A) 1395 { 1396 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1397 int ierr; 1398 1399 PetscFunctionBegin; 1400 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1401 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNC__ 1406 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1407 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1408 { 1409 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1410 Mat A = a->A, B = a->B; 1411 int ierr; 1412 double isend[5], irecv[5]; 1413 1414 PetscFunctionBegin; 1415 info->block_size = (double)a->bs; 1416 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1417 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1418 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1419 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1420 if (flag == MAT_LOCAL) { 1421 info->nz_used = isend[0]; 1422 info->nz_allocated = isend[1]; 1423 info->nz_unneeded = isend[2]; 1424 info->memory = isend[3]; 1425 info->mallocs = isend[4]; 1426 } else if (flag == MAT_GLOBAL_MAX) { 1427 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 1428 info->nz_used = irecv[0]; 1429 info->nz_allocated = irecv[1]; 1430 info->nz_unneeded = irecv[2]; 1431 info->memory = irecv[3]; 1432 info->mallocs = irecv[4]; 1433 } else if (flag == MAT_GLOBAL_SUM) { 1434 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 1435 info->nz_used = irecv[0]; 1436 info->nz_allocated = irecv[1]; 1437 info->nz_unneeded = irecv[2]; 1438 info->memory = irecv[3]; 1439 info->mallocs = irecv[4]; 1440 } 1441 info->rows_global = (double)a->M; 1442 info->columns_global = (double)a->N; 1443 info->rows_local = (double)a->m; 1444 info->columns_local = (double)a->N; 1445 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1446 info->fill_ratio_needed = 0; 1447 info->factor_mallocs = 0; 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNC__ 1452 #define __FUNC__ "MatSetOption_MPIBAIJ" 1453 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1454 { 1455 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1456 1457 PetscFunctionBegin; 1458 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1459 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1460 op == MAT_COLUMNS_UNSORTED || 1461 op == MAT_COLUMNS_SORTED || 1462 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1463 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1464 MatSetOption(a->A,op); 1465 MatSetOption(a->B,op); 1466 } else if (op == MAT_ROW_ORIENTED) { 1467 a->roworiented = 1; 1468 MatSetOption(a->A,op); 1469 MatSetOption(a->B,op); 1470 } else if (op == MAT_ROWS_SORTED || 1471 op == MAT_ROWS_UNSORTED || 1472 op == MAT_SYMMETRIC || 1473 op == MAT_STRUCTURALLY_SYMMETRIC || 1474 op == MAT_YES_NEW_DIAGONALS || 1475 op == MAT_USE_HASH_TABLE) 1476 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1477 else if (op == MAT_COLUMN_ORIENTED) { 1478 a->roworiented = 0; 1479 MatSetOption(a->A,op); 1480 MatSetOption(a->B,op); 1481 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1482 a->donotstash = 1; 1483 } else if (op == MAT_NO_NEW_DIAGONALS) { 1484 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1485 } else if (op == MAT_USE_HASH_TABLE) { 1486 a->ht_flag = 1; 1487 } else { 1488 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1489 } 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNC__ 1494 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1495 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1496 { 1497 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1498 Mat_SeqBAIJ *Aloc; 1499 Mat B; 1500 int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 1501 int bs=baij->bs,mbs=baij->mbs; 1502 Scalar *a; 1503 1504 PetscFunctionBegin; 1505 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1506 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1507 CHKERRQ(ierr); 1508 1509 /* copy over the A part */ 1510 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1511 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1512 row = baij->rstart; 1513 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1514 1515 for ( i=0; i<mbs; i++ ) { 1516 rvals[0] = bs*(baij->rstart + i); 1517 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1518 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1519 col = (baij->cstart+aj[j])*bs; 1520 for (k=0; k<bs; k++ ) { 1521 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1522 col++; a += bs; 1523 } 1524 } 1525 } 1526 /* copy over the B part */ 1527 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1528 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1529 row = baij->rstart*bs; 1530 for ( i=0; i<mbs; i++ ) { 1531 rvals[0] = bs*(baij->rstart + i); 1532 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1533 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1534 col = baij->garray[aj[j]]*bs; 1535 for (k=0; k<bs; k++ ) { 1536 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1537 col++; a += bs; 1538 } 1539 } 1540 } 1541 PetscFree(rvals); 1542 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1543 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1544 1545 if (matout != PETSC_NULL) { 1546 *matout = B; 1547 } else { 1548 PetscOps *Abops; 1549 struct _MatOps *Aops; 1550 1551 /* This isn't really an in-place transpose .... but free data structures from baij */ 1552 PetscFree(baij->rowners); 1553 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1554 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1555 if (baij->colmap) PetscFree(baij->colmap); 1556 if (baij->garray) PetscFree(baij->garray); 1557 if (baij->lvec) VecDestroy(baij->lvec); 1558 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1559 PetscFree(baij); 1560 1561 /* 1562 This is horrible, horrible code. We need to keep the 1563 A pointers for the bops and ops but copy everything 1564 else from C. 1565 */ 1566 Abops = A->bops; 1567 Aops = A->ops; 1568 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1569 A->bops = Abops; 1570 A->ops = Aops; 1571 1572 PetscHeaderDestroy(B); 1573 } 1574 PetscFunctionReturn(0); 1575 } 1576 1577 #undef __FUNC__ 1578 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1579 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1580 { 1581 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1582 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1583 int ierr,s1,s2,s3; 1584 1585 PetscFunctionBegin; 1586 if (ll) { 1587 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1588 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1589 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1590 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1591 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1592 } 1593 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1594 PetscFunctionReturn(0); 1595 } 1596 1597 #undef __FUNC__ 1598 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1599 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1600 { 1601 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1602 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1603 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1604 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1605 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1606 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1607 MPI_Comm comm = A->comm; 1608 MPI_Request *send_waits,*recv_waits; 1609 MPI_Status recv_status,*send_status; 1610 IS istmp; 1611 1612 PetscFunctionBegin; 1613 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1614 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1615 1616 /* first count number of contributors to each processor */ 1617 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1618 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1619 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1620 for ( i=0; i<N; i++ ) { 1621 idx = rows[i]; 1622 found = 0; 1623 for ( j=0; j<size; j++ ) { 1624 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1625 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1626 } 1627 } 1628 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1629 } 1630 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1631 1632 /* inform other processors of number of messages and max length*/ 1633 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1634 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1635 nrecvs = work[rank]; 1636 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1637 nmax = work[rank]; 1638 PetscFree(work); 1639 1640 /* post receives: */ 1641 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1642 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1643 for ( i=0; i<nrecvs; i++ ) { 1644 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1645 } 1646 1647 /* do sends: 1648 1) starts[i] gives the starting index in svalues for stuff going to 1649 the ith processor 1650 */ 1651 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1652 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1653 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1654 starts[0] = 0; 1655 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1656 for ( i=0; i<N; i++ ) { 1657 svalues[starts[owner[i]]++] = rows[i]; 1658 } 1659 ISRestoreIndices(is,&rows); 1660 1661 starts[0] = 0; 1662 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1663 count = 0; 1664 for ( i=0; i<size; i++ ) { 1665 if (procs[i]) { 1666 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1667 } 1668 } 1669 PetscFree(starts); 1670 1671 base = owners[rank]*bs; 1672 1673 /* wait on receives */ 1674 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1675 source = lens + nrecvs; 1676 count = nrecvs; slen = 0; 1677 while (count) { 1678 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1679 /* unpack receives into our local space */ 1680 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1681 source[imdex] = recv_status.MPI_SOURCE; 1682 lens[imdex] = n; 1683 slen += n; 1684 count--; 1685 } 1686 PetscFree(recv_waits); 1687 1688 /* move the data into the send scatter */ 1689 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1690 count = 0; 1691 for ( i=0; i<nrecvs; i++ ) { 1692 values = rvalues + i*nmax; 1693 for ( j=0; j<lens[i]; j++ ) { 1694 lrows[count++] = values[j] - base; 1695 } 1696 } 1697 PetscFree(rvalues); PetscFree(lens); 1698 PetscFree(owner); PetscFree(nprocs); 1699 1700 /* actually zap the local rows */ 1701 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1702 PLogObjectParent(A,istmp); 1703 1704 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1705 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1706 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1707 1708 if (diag) { 1709 for ( i = 0; i < slen; i++ ) { 1710 row = lrows[i] + rstart_bs; 1711 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1712 } 1713 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1714 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1715 } 1716 PetscFree(lrows); 1717 1718 /* wait on sends */ 1719 if (nsends) { 1720 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1721 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1722 PetscFree(send_status); 1723 } 1724 PetscFree(send_waits); PetscFree(svalues); 1725 1726 PetscFunctionReturn(0); 1727 } 1728 extern int MatPrintHelp_SeqBAIJ(Mat); 1729 #undef __FUNC__ 1730 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1731 int MatPrintHelp_MPIBAIJ(Mat A) 1732 { 1733 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1734 MPI_Comm comm = A->comm; 1735 static int called = 0; 1736 int ierr; 1737 1738 PetscFunctionBegin; 1739 if (!a->rank) { 1740 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1741 } 1742 if (called) {PetscFunctionReturn(0);} else called = 1; 1743 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1744 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1745 PetscFunctionReturn(0); 1746 } 1747 1748 #undef __FUNC__ 1749 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1750 int MatSetUnfactored_MPIBAIJ(Mat A) 1751 { 1752 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1753 int ierr; 1754 1755 PetscFunctionBegin; 1756 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1757 PetscFunctionReturn(0); 1758 } 1759 1760 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1761 1762 /* -------------------------------------------------------------------*/ 1763 static struct _MatOps MatOps = { 1764 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1765 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1766 0,0,0,0, 1767 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1768 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1769 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1770 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1771 0,0,0,MatGetSize_MPIBAIJ, 1772 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1773 0,0,MatConvertSameType_MPIBAIJ,0,0, 1774 0,0,0,MatGetSubMatrices_MPIBAIJ, 1775 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1776 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1777 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1778 1779 1780 #undef __FUNC__ 1781 #define __FUNC__ "MatCreateMPIBAIJ" 1782 /*@C 1783 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1784 (block compressed row). For good matrix assembly performance 1785 the user should preallocate the matrix storage by setting the parameters 1786 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1787 performance can be increased by more than a factor of 50. 1788 1789 Input Parameters: 1790 . comm - MPI communicator 1791 . bs - size of blockk 1792 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1793 This value should be the same as the local size used in creating the 1794 y vector for the matrix-vector product y = Ax. 1795 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1796 This value should be the same as the local size used in creating the 1797 x vector for the matrix-vector product y = Ax. 1798 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1799 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1800 . d_nz - number of block nonzeros per block row in diagonal portion of local 1801 submatrix (same for all local rows) 1802 . d_nzz - array containing the number of block nonzeros in the various block rows 1803 of the in diagonal portion of the local (possibly different for each block 1804 row) or PETSC_NULL. You must leave room for the diagonal entry even if 1805 it is zero. 1806 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1807 submatrix (same for all local rows). 1808 . o_nzz - array containing the number of nonzeros in the various block rows of the 1809 off-diagonal portion of the local submatrix (possibly different for 1810 each block row) or PETSC_NULL. 1811 1812 Output Parameter: 1813 . A - the matrix 1814 1815 Notes: 1816 The user MUST specify either the local or global matrix dimensions 1817 (possibly both). 1818 1819 Storage Information: 1820 For a square global matrix we define each processor's diagonal portion 1821 to be its local rows and the corresponding columns (a square submatrix); 1822 each processor's off-diagonal portion encompasses the remainder of the 1823 local matrix (a rectangular submatrix). 1824 1825 The user can specify preallocated storage for the diagonal part of 1826 the local submatrix with either d_nz or d_nnz (not both). Set 1827 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1828 memory allocation. Likewise, specify preallocated storage for the 1829 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1830 1831 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1832 the figure below we depict these three local rows and all columns (0-11). 1833 1834 $ 0 1 2 3 4 5 6 7 8 9 10 11 1835 $ ------------------- 1836 $ row 3 | o o o d d d o o o o o o 1837 $ row 4 | o o o d d d o o o o o o 1838 $ row 5 | o o o d d d o o o o o o 1839 $ ------------------- 1840 $ 1841 1842 Thus, any entries in the d locations are stored in the d (diagonal) 1843 submatrix, and any entries in the o locations are stored in the 1844 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1845 stored simply in the MATSEQBAIJ format for compressed row storage. 1846 1847 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1848 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1849 In general, for PDE problems in which most nonzeros are near the diagonal, 1850 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1851 or you will get TERRIBLE performance; see the users' manual chapter on 1852 matrices. 1853 1854 .keywords: matrix, block, aij, compressed row, sparse, parallel 1855 1856 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 1857 @*/ 1858 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1859 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1860 { 1861 Mat B; 1862 Mat_MPIBAIJ *b; 1863 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1864 1865 PetscFunctionBegin; 1866 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1867 1868 MPI_Comm_size(comm,&size); 1869 if (size == 1) { 1870 if (M == PETSC_DECIDE) M = m; 1871 if (N == PETSC_DECIDE) N = n; 1872 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1873 PetscFunctionReturn(0); 1874 } 1875 1876 *A = 0; 1877 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1878 PLogObjectCreate(B); 1879 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1880 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1881 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1882 1883 B->destroy = MatDestroy_MPIBAIJ; 1884 B->view = MatView_MPIBAIJ; 1885 B->mapping = 0; 1886 B->factor = 0; 1887 B->assembled = PETSC_FALSE; 1888 1889 B->insertmode = NOT_SET_VALUES; 1890 MPI_Comm_rank(comm,&b->rank); 1891 MPI_Comm_size(comm,&b->size); 1892 1893 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1894 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1895 } 1896 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1897 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1898 } 1899 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1900 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1901 } 1902 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1903 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1904 1905 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1906 work[0] = m; work[1] = n; 1907 mbs = m/bs; nbs = n/bs; 1908 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1909 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1910 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1911 } 1912 if (m == PETSC_DECIDE) { 1913 Mbs = M/bs; 1914 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1915 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1916 m = mbs*bs; 1917 } 1918 if (n == PETSC_DECIDE) { 1919 Nbs = N/bs; 1920 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1921 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1922 n = nbs*bs; 1923 } 1924 if (mbs*bs != m || nbs*bs != n) { 1925 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1926 } 1927 1928 b->m = m; B->m = m; 1929 b->n = n; B->n = n; 1930 b->N = N; B->N = N; 1931 b->M = M; B->M = M; 1932 b->bs = bs; 1933 b->bs2 = bs*bs; 1934 b->mbs = mbs; 1935 b->nbs = nbs; 1936 b->Mbs = Mbs; 1937 b->Nbs = Nbs; 1938 1939 /* build local table of row and column ownerships */ 1940 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1941 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1942 b->cowners = b->rowners + b->size + 2; 1943 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1944 b->rowners[0] = 0; 1945 for ( i=2; i<=b->size; i++ ) { 1946 b->rowners[i] += b->rowners[i-1]; 1947 } 1948 b->rstart = b->rowners[b->rank]; 1949 b->rend = b->rowners[b->rank+1]; 1950 b->rstart_bs = b->rstart * bs; 1951 b->rend_bs = b->rend * bs; 1952 1953 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1954 b->cowners[0] = 0; 1955 for ( i=2; i<=b->size; i++ ) { 1956 b->cowners[i] += b->cowners[i-1]; 1957 } 1958 b->cstart = b->cowners[b->rank]; 1959 b->cend = b->cowners[b->rank+1]; 1960 b->cstart_bs = b->cstart * bs; 1961 b->cend_bs = b->cend * bs; 1962 1963 1964 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1965 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1966 PLogObjectParent(B,b->A); 1967 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1968 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1969 PLogObjectParent(B,b->B); 1970 1971 /* build cache for off array entries formed */ 1972 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1973 b->donotstash = 0; 1974 b->colmap = 0; 1975 b->garray = 0; 1976 b->roworiented = 1; 1977 1978 /* stuff used in block assembly */ 1979 b->barray = 0; 1980 1981 /* stuff used for matrix vector multiply */ 1982 b->lvec = 0; 1983 b->Mvctx = 0; 1984 1985 /* stuff for MatGetRow() */ 1986 b->rowindices = 0; 1987 b->rowvalues = 0; 1988 b->getrowactive = PETSC_FALSE; 1989 1990 /* hash table stuff */ 1991 b->ht = 0; 1992 b->hd = 0; 1993 b->ht_size = 0; 1994 b->ht_flag = 0; 1995 b->ht_fact = 0; 1996 b->ht_total_ct = 0; 1997 b->ht_insert_ct = 0; 1998 1999 *A = B; 2000 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2001 if (flg) { 2002 double fact = 1.39; 2003 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2004 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2005 if (fact <= 1.0) fact = 1.39; 2006 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2007 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2008 } 2009 PetscFunctionReturn(0); 2010 } 2011 2012 #undef __FUNC__ 2013 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 2014 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 2015 { 2016 Mat mat; 2017 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2018 int ierr, len=0, flg; 2019 2020 PetscFunctionBegin; 2021 *newmat = 0; 2022 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 2023 PLogObjectCreate(mat); 2024 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2025 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 2026 mat->destroy = MatDestroy_MPIBAIJ; 2027 mat->view = MatView_MPIBAIJ; 2028 mat->factor = matin->factor; 2029 mat->assembled = PETSC_TRUE; 2030 2031 a->m = mat->m = oldmat->m; 2032 a->n = mat->n = oldmat->n; 2033 a->M = mat->M = oldmat->M; 2034 a->N = mat->N = oldmat->N; 2035 2036 a->bs = oldmat->bs; 2037 a->bs2 = oldmat->bs2; 2038 a->mbs = oldmat->mbs; 2039 a->nbs = oldmat->nbs; 2040 a->Mbs = oldmat->Mbs; 2041 a->Nbs = oldmat->Nbs; 2042 2043 a->rstart = oldmat->rstart; 2044 a->rend = oldmat->rend; 2045 a->cstart = oldmat->cstart; 2046 a->cend = oldmat->cend; 2047 a->size = oldmat->size; 2048 a->rank = oldmat->rank; 2049 mat->insertmode = NOT_SET_VALUES; 2050 a->rowvalues = 0; 2051 a->getrowactive = PETSC_FALSE; 2052 a->barray = 0; 2053 2054 /* hash table stuff */ 2055 a->ht = 0; 2056 a->hd = 0; 2057 a->ht_size = 0; 2058 a->ht_flag = oldmat->ht_flag; 2059 a->ht_fact = oldmat->ht_fact; 2060 a->ht_total_ct = 0; 2061 a->ht_insert_ct = 0; 2062 2063 2064 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2065 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2066 a->cowners = a->rowners + a->size + 2; 2067 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2068 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 2069 if (oldmat->colmap) { 2070 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2071 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2072 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2073 } else a->colmap = 0; 2074 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2075 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2076 PLogObjectMemory(mat,len*sizeof(int)); 2077 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2078 } else a->garray = 0; 2079 2080 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2081 PLogObjectParent(mat,a->lvec); 2082 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2083 PLogObjectParent(mat,a->Mvctx); 2084 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 2085 PLogObjectParent(mat,a->A); 2086 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 2087 PLogObjectParent(mat,a->B); 2088 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2089 if (flg) { 2090 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2091 } 2092 *newmat = mat; 2093 PetscFunctionReturn(0); 2094 } 2095 2096 #include "sys.h" 2097 2098 #undef __FUNC__ 2099 #define __FUNC__ "MatLoad_MPIBAIJ" 2100 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2101 { 2102 Mat A; 2103 int i, nz, ierr, j,rstart, rend, fd; 2104 Scalar *vals,*buf; 2105 MPI_Comm comm = ((PetscObject)viewer)->comm; 2106 MPI_Status status; 2107 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2108 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2109 int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 2110 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2111 int dcount,kmax,k,nzcount,tmp; 2112 2113 PetscFunctionBegin; 2114 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2115 bs2 = bs*bs; 2116 2117 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2118 if (!rank) { 2119 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2120 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2121 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2122 if (header[3] < 0) { 2123 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2124 } 2125 } 2126 2127 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2128 M = header[1]; N = header[2]; 2129 2130 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2131 2132 /* 2133 This code adds extra rows to make sure the number of rows is 2134 divisible by the blocksize 2135 */ 2136 Mbs = M/bs; 2137 extra_rows = bs - M + bs*(Mbs); 2138 if (extra_rows == bs) extra_rows = 0; 2139 else Mbs++; 2140 if (extra_rows &&!rank) { 2141 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2142 } 2143 2144 /* determine ownership of all rows */ 2145 mbs = Mbs/size + ((Mbs % size) > rank); 2146 m = mbs * bs; 2147 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2148 browners = rowners + size + 1; 2149 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2150 rowners[0] = 0; 2151 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2152 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2153 rstart = rowners[rank]; 2154 rend = rowners[rank+1]; 2155 2156 /* distribute row lengths to all processors */ 2157 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2158 if (!rank) { 2159 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2160 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2161 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2162 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2163 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2164 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2165 PetscFree(sndcounts); 2166 } else { 2167 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2168 } 2169 2170 if (!rank) { 2171 /* calculate the number of nonzeros on each processor */ 2172 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2173 PetscMemzero(procsnz,size*sizeof(int)); 2174 for ( i=0; i<size; i++ ) { 2175 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2176 procsnz[i] += rowlengths[j]; 2177 } 2178 } 2179 PetscFree(rowlengths); 2180 2181 /* determine max buffer needed and allocate it */ 2182 maxnz = 0; 2183 for ( i=0; i<size; i++ ) { 2184 maxnz = PetscMax(maxnz,procsnz[i]); 2185 } 2186 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2187 2188 /* read in my part of the matrix column indices */ 2189 nz = procsnz[0]; 2190 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2191 mycols = ibuf; 2192 if (size == 1) nz -= extra_rows; 2193 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2194 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2195 2196 /* read in every ones (except the last) and ship off */ 2197 for ( i=1; i<size-1; i++ ) { 2198 nz = procsnz[i]; 2199 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2200 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2201 } 2202 /* read in the stuff for the last proc */ 2203 if ( size != 1 ) { 2204 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2205 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2206 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2207 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2208 } 2209 PetscFree(cols); 2210 } else { 2211 /* determine buffer space needed for message */ 2212 nz = 0; 2213 for ( i=0; i<m; i++ ) { 2214 nz += locrowlens[i]; 2215 } 2216 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2217 mycols = ibuf; 2218 /* receive message of column indices*/ 2219 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2220 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2221 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2222 } 2223 2224 /* loop over local rows, determining number of off diagonal entries */ 2225 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2226 odlens = dlens + (rend-rstart); 2227 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2228 PetscMemzero(mask,3*Mbs*sizeof(int)); 2229 masked1 = mask + Mbs; 2230 masked2 = masked1 + Mbs; 2231 rowcount = 0; nzcount = 0; 2232 for ( i=0; i<mbs; i++ ) { 2233 dcount = 0; 2234 odcount = 0; 2235 for ( j=0; j<bs; j++ ) { 2236 kmax = locrowlens[rowcount]; 2237 for ( k=0; k<kmax; k++ ) { 2238 tmp = mycols[nzcount++]/bs; 2239 if (!mask[tmp]) { 2240 mask[tmp] = 1; 2241 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2242 else masked1[dcount++] = tmp; 2243 } 2244 } 2245 rowcount++; 2246 } 2247 2248 dlens[i] = dcount; 2249 odlens[i] = odcount; 2250 2251 /* zero out the mask elements we set */ 2252 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2253 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2254 } 2255 2256 /* create our matrix */ 2257 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2258 CHKERRQ(ierr); 2259 A = *newmat; 2260 MatSetOption(A,MAT_COLUMNS_SORTED); 2261 2262 if (!rank) { 2263 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2264 /* read in my part of the matrix numerical values */ 2265 nz = procsnz[0]; 2266 vals = buf; 2267 mycols = ibuf; 2268 if (size == 1) nz -= extra_rows; 2269 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2270 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2271 2272 /* insert into matrix */ 2273 jj = rstart*bs; 2274 for ( i=0; i<m; i++ ) { 2275 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2276 mycols += locrowlens[i]; 2277 vals += locrowlens[i]; 2278 jj++; 2279 } 2280 /* read in other processors (except the last one) and ship out */ 2281 for ( i=1; i<size-1; i++ ) { 2282 nz = procsnz[i]; 2283 vals = buf; 2284 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2285 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2286 } 2287 /* the last proc */ 2288 if ( size != 1 ){ 2289 nz = procsnz[i] - extra_rows; 2290 vals = buf; 2291 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2292 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2293 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2294 } 2295 PetscFree(procsnz); 2296 } else { 2297 /* receive numeric values */ 2298 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2299 2300 /* receive message of values*/ 2301 vals = buf; 2302 mycols = ibuf; 2303 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2304 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2305 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2306 2307 /* insert into matrix */ 2308 jj = rstart*bs; 2309 for ( i=0; i<m; i++ ) { 2310 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2311 mycols += locrowlens[i]; 2312 vals += locrowlens[i]; 2313 jj++; 2314 } 2315 } 2316 PetscFree(locrowlens); 2317 PetscFree(buf); 2318 PetscFree(ibuf); 2319 PetscFree(rowners); 2320 PetscFree(dlens); 2321 PetscFree(mask); 2322 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2323 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2324 PetscFunctionReturn(0); 2325 } 2326 2327 2328 2329 #undef __FUNC__ 2330 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2331 /*@ 2332 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2333 2334 Input Parameters: 2335 . mat - the matrix 2336 . fact - factor 2337 2338 Notes: 2339 This can also be set by the command line option: -mat_use_hash_table fact 2340 2341 .keywords: matrix, hashtable, factor, HT 2342 2343 .seealso: MatSetOption() 2344 @*/ 2345 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2346 { 2347 Mat_MPIBAIJ *baij; 2348 2349 PetscFunctionBegin; 2350 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2351 if (mat->type != MATMPIBAIJ) { 2352 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2353 } 2354 baij = (Mat_MPIBAIJ*) mat->data; 2355 baij->ht_fact = fact; 2356 PetscFunctionReturn(0); 2357 } 2358