1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.119 1998/04/27 03:52:09 curfman Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" /*I "mat.h" I*/ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 23 static int CreateColmap_MPIBAIJ_Private(Mat mat) 24 { 25 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 26 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27 int nbs = B->nbs,i,bs=B->bs;; 28 29 PetscFunctionBegin; 30 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 31 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 32 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 34 PetscFunctionReturn(0); 35 } 36 37 #define CHUNKSIZE 10 38 39 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 40 { \ 41 \ 42 brow = row/bs; \ 43 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44 rmax = aimax[brow]; nrow = ailen[brow]; \ 45 bcol = col/bs; \ 46 ridx = row % bs; cidx = col % bs; \ 47 low = 0; high = nrow; \ 48 while (high-low > 3) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > bcol) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=low; _i<high; _i++ ) { \ 54 if (rp[_i] > bcol) break; \ 55 if (rp[_i] == bcol) { \ 56 bap = ap + bs2*_i + bs*cidx + ridx; \ 57 if (addv == ADD_VALUES) *bap += value; \ 58 else *bap = value; \ 59 goto a_noinsert; \ 60 } \ 61 } \ 62 if (a->nonew == 1) goto a_noinsert; \ 63 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 64 if (nrow >= rmax) { \ 65 /* there is no extra room in row, therefore enlarge */ \ 66 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 67 Scalar *new_a; \ 68 \ 69 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 70 \ 71 /* malloc new storage space */ \ 72 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 73 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 74 new_j = (int *) (new_a + bs2*new_nz); \ 75 new_i = new_j + new_nz; \ 76 \ 77 /* copy over old data into new slots */ \ 78 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 79 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 80 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 81 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 82 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 83 len*sizeof(int)); \ 84 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 85 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 86 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 87 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 88 /* free up old matrix storage */ \ 89 PetscFree(a->a); \ 90 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 91 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 92 a->singlemalloc = 1; \ 93 \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 96 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 97 a->maxnz += bs2*CHUNKSIZE; \ 98 a->reallocs++; \ 99 a->nz++; \ 100 } \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for ( ii=N; ii>=_i; ii-- ) { \ 104 rp[ii+1] = rp[ii]; \ 105 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 106 } \ 107 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 a_noinsert:; \ 111 ailen[brow] = nrow; \ 112 } 113 114 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 brow = row/bs; \ 118 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119 rmax = bimax[brow]; nrow = bilen[brow]; \ 120 bcol = col/bs; \ 121 ridx = row % bs; cidx = col % bs; \ 122 low = 0; high = nrow; \ 123 while (high-low > 3) { \ 124 t = (low+high)/2; \ 125 if (rp[t] > bcol) high = t; \ 126 else low = t; \ 127 } \ 128 for ( _i=low; _i<high; _i++ ) { \ 129 if (rp[_i] > bcol) break; \ 130 if (rp[_i] == bcol) { \ 131 bap = ap + bs2*_i + bs*cidx + ridx; \ 132 if (addv == ADD_VALUES) *bap += value; \ 133 else *bap = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (b->nonew == 1) goto b_noinsert; \ 138 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + bs2*new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158 len*sizeof(int)); \ 159 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163 /* free up old matrix storage */ \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 172 b->maxnz += bs2*CHUNKSIZE; \ 173 b->reallocs++; \ 174 b->nz++; \ 175 } \ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for ( ii=N; ii>=_i; ii-- ) { \ 179 rp[ii+1] = rp[ii]; \ 180 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181 } \ 182 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNC__ 190 #define __FUNC__ "MatSetValues_MPIBAIJ" 191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 194 Scalar value; 195 int ierr,i,j,row,col; 196 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 197 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 198 int cend_orig=baij->cend_bs,bs=baij->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 Scalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 Scalar *ba=b->a; 210 211 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 int low,high,t,ridx,cidx,bs2=a->bs2; 213 Scalar *ap,*bap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 #if defined(USE_PETSC_BOPT_g) 218 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig){ 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 #if defined(USE_PETSC_BOPT_g) 231 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 241 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ *) (B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba=b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !baij->donotstash) { 257 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!baij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272 #undef __FUNC__ 273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 Scalar *value,*barray=baij->barray; 278 int ierr,i,j,ii,jj,row,col,k,l; 279 int roworiented = baij->roworiented,rstart=baij->rstart ; 280 int rend=baij->rend,cstart=baij->cstart,stepval; 281 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282 283 if(!barray) { 284 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 285 } 286 287 if (roworiented) { 288 stepval = (n-1)*bs; 289 } else { 290 stepval = (m-1)*bs; 291 } 292 for ( i=0; i<m; i++ ) { 293 #if defined(USE_PETSC_BOPT_g) 294 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296 #endif 297 if (im[i] >= rstart && im[i] < rend) { 298 row = im[i] - rstart; 299 for ( j=0; j<n; j++ ) { 300 /* If NumCol = 1 then a copy is not required */ 301 if ((roworiented) && (n == 1)) { 302 barray = v + i*bs2; 303 } else if((!roworiented) && (m == 1)) { 304 barray = v + j*bs2; 305 } else { /* Here a copy is required */ 306 if (roworiented) { 307 value = v + i*(stepval+bs)*bs + j*bs; 308 } else { 309 value = v + j*(stepval+bs)*bs + i*bs; 310 } 311 for ( ii=0; ii<bs; ii++,value+=stepval ) { 312 for (jj=0; jj<bs; jj++ ) { 313 *barray++ = *value++; 314 } 315 } 316 barray -=bs2; 317 } 318 319 if (in[j] >= cstart && in[j] < cend){ 320 col = in[j] - cstart; 321 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322 } 323 #if defined(USE_PETSC_BOPT_g) 324 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326 #endif 327 else { 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 333 #if defined(USE_PETSC_BOPT_g) 334 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335 #endif 336 col = (baij->colmap[in[j]] - 1)/bs; 337 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339 col = in[j]; 340 } 341 } 342 else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (!baij->donotstash) { 348 if (roworiented ) { 349 row = im[i]*bs; 350 value = v + i*(stepval+bs)*bs; 351 for ( j=0; j<bs; j++,row++ ) { 352 for ( k=0; k<n; k++ ) { 353 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355 } 356 } 357 } 358 } else { 359 for ( j=0; j<n; j++ ) { 360 value = v + j*(stepval+bs)*bs + i*bs; 361 col = in[j]*bs; 362 for ( k=0; k<bs; k++,col++,value+=stepval) { 363 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 #include <math.h> 375 #define HASH_KEY 0.6180339887 376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNC__ 380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 384 int ierr,i,j,row,col; 385 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 386 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 387 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 388 double tmp; 389 Scalar ** HD = baij->hd,value; 390 #if defined(USE_PETSC_BOPT_g) 391 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 392 #endif 393 394 PetscFunctionBegin; 395 396 for ( i=0; i<m; i++ ) { 397 #if defined(USE_PETSC_BOPT_g) 398 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 399 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 400 #endif 401 row = im[i]; 402 if (row >= rstart_orig && row < rend_orig) { 403 for ( j=0; j<n; j++ ) { 404 col = in[j]; 405 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 406 /* Look up into the Hash Table */ 407 key = (row/bs)*Nbs+(col/bs)+1; 408 h1 = HASH(size,key,tmp); 409 410 411 idx = h1; 412 #if defined(USE_PETSC_BOPT_g) 413 insert_ct++; 414 total_ct++; 415 if (HT[idx] != key) { 416 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 417 if (idx == size) { 418 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 419 if (idx == h1) { 420 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 421 } 422 } 423 } 424 #else 425 if (HT[idx] != key) { 426 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 427 if (idx == size) { 428 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 429 if (idx == h1) { 430 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 431 } 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else { 440 if (roworiented && !baij->donotstash) { 441 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 442 } else { 443 if (!baij->donotstash) { 444 row = im[i]; 445 for ( j=0; j<n; j++ ) { 446 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 447 } 448 } 449 } 450 } 451 } 452 #if defined(USE_PETSC_BOPT_g) 453 baij->ht_total_ct = total_ct; 454 baij->ht_insert_ct = insert_ct; 455 #endif 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNC__ 460 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 461 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 462 { 463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 464 int ierr,i,j,ii,jj,row,col,k,l; 465 int roworiented = baij->roworiented,rstart=baij->rstart ; 466 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 467 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 468 double tmp; 469 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 470 #if defined(USE_PETSC_BOPT_g) 471 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 472 #endif 473 474 PetscFunctionBegin; 475 476 if (roworiented) { 477 stepval = (n-1)*bs; 478 } else { 479 stepval = (m-1)*bs; 480 } 481 for ( i=0; i<m; i++ ) { 482 #if defined(USE_PETSC_BOPT_g) 483 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 484 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 485 #endif 486 row = im[i]; 487 v_t = v + i*bs2; 488 if (row >= rstart && row < rend) { 489 for ( j=0; j<n; j++ ) { 490 col = in[j]; 491 492 /* Look up into the Hash Table */ 493 key = row*Nbs+col+1; 494 h1 = HASH(size,key,tmp); 495 496 idx = h1; 497 #if defined(USE_PETSC_BOPT_g) 498 total_ct++; 499 insert_ct++; 500 if (HT[idx] != key) { 501 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 502 if (idx == size) { 503 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 504 if (idx == h1) { 505 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 506 } 507 } 508 } 509 #else 510 if (HT[idx] != key) { 511 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 512 if (idx == size) { 513 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 514 if (idx == h1) { 515 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 516 } 517 } 518 } 519 #endif 520 baij_a = HD[idx]; 521 if (roworiented) { 522 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 523 /* value = v + (i*(stepval+bs)+j)*bs; */ 524 value = v_t; 525 v_t += bs; 526 if (addv == ADD_VALUES) { 527 for ( ii=0; ii<bs; ii++,value+=stepval) { 528 for ( jj=ii; jj<bs2; jj+=bs ) { 529 baij_a[jj] += *value++; 530 } 531 } 532 } else { 533 for ( ii=0; ii<bs; ii++,value+=stepval) { 534 for ( jj=ii; jj<bs2; jj+=bs ) { 535 baij_a[jj] = *value++; 536 } 537 } 538 } 539 } else { 540 value = v + j*(stepval+bs)*bs + i*bs; 541 if (addv == ADD_VALUES) { 542 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 543 for ( jj=0; jj<bs; jj++ ) { 544 baij_a[jj] += *value++; 545 } 546 } 547 } else { 548 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 549 for ( jj=0; jj<bs; jj++ ) { 550 baij_a[jj] = *value++; 551 } 552 } 553 } 554 } 555 } 556 } else { 557 if (!baij->donotstash) { 558 if (roworiented ) { 559 row = im[i]*bs; 560 value = v + i*(stepval+bs)*bs; 561 for ( j=0; j<bs; j++,row++ ) { 562 for ( k=0; k<n; k++ ) { 563 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 564 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 565 } 566 } 567 } 568 } else { 569 for ( j=0; j<n; j++ ) { 570 value = v + j*(stepval+bs)*bs + i*bs; 571 col = in[j]*bs; 572 for ( k=0; k<bs; k++,col++,value+=stepval) { 573 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 574 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 } 580 } 581 } 582 #if defined(USE_PETSC_BOPT_g) 583 baij->ht_total_ct = total_ct; 584 baij->ht_insert_ct = insert_ct; 585 #endif 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNC__ 590 #define __FUNC__ "MatGetValues_MPIBAIJ" 591 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 592 { 593 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 594 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 595 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 596 597 PetscFunctionBegin; 598 for ( i=0; i<m; i++ ) { 599 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 600 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 601 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 602 row = idxm[i] - bsrstart; 603 for ( j=0; j<n; j++ ) { 604 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 605 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 606 if (idxn[j] >= bscstart && idxn[j] < bscend){ 607 col = idxn[j] - bscstart; 608 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 609 } else { 610 if (!baij->colmap) { 611 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 612 } 613 if((baij->colmap[idxn[j]/bs]-1 < 0) || 614 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 615 else { 616 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 617 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 618 } 619 } 620 } 621 } else { 622 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 #undef __FUNC__ 629 #define __FUNC__ "MatNorm_MPIBAIJ" 630 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 631 { 632 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 633 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 634 int ierr, i,bs2=baij->bs2; 635 double sum = 0.0; 636 Scalar *v; 637 638 PetscFunctionBegin; 639 if (baij->size == 1) { 640 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 641 } else { 642 if (type == NORM_FROBENIUS) { 643 v = amat->a; 644 for (i=0; i<amat->nz*bs2; i++ ) { 645 #if defined(USE_PETSC_COMPLEX) 646 sum += real(conj(*v)*(*v)); v++; 647 #else 648 sum += (*v)*(*v); v++; 649 #endif 650 } 651 v = bmat->a; 652 for (i=0; i<bmat->nz*bs2; i++ ) { 653 #if defined(USE_PETSC_COMPLEX) 654 sum += real(conj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 660 *norm = sqrt(*norm); 661 } else { 662 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 663 } 664 } 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNC__ 669 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 670 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 673 MPI_Comm comm = mat->comm; 674 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 675 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 676 MPI_Request *send_waits,*recv_waits; 677 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 678 InsertMode addv; 679 Scalar *rvalues,*svalues; 680 681 PetscFunctionBegin; 682 /* make sure all processors are either in INSERTMODE or ADDMODE */ 683 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 684 if (addv == (ADD_VALUES|INSERT_VALUES)) { 685 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 686 } 687 mat->insertmode = addv; /* in case this processor had no cache */ 688 689 /* first count number of contributors to each processor */ 690 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 691 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 692 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 693 for ( i=0; i<baij->stash.n; i++ ) { 694 idx = baij->stash.idx[i]; 695 for ( j=0; j<size; j++ ) { 696 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 697 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 698 } 699 } 700 } 701 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 702 703 /* inform other processors of number of messages and max length*/ 704 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 705 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 706 nreceives = work[rank]; 707 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 708 nmax = work[rank]; 709 PetscFree(work); 710 711 /* post receives: 712 1) each message will consist of ordered pairs 713 (global index,value) we store the global index as a double 714 to simplify the message passing. 715 2) since we don't know how long each individual message is we 716 allocate the largest needed buffer for each receive. Potentially 717 this is a lot of wasted space. 718 719 720 This could be done better. 721 */ 722 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 723 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 724 for ( i=0; i<nreceives; i++ ) { 725 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 726 } 727 728 /* do sends: 729 1) starts[i] gives the starting index in svalues for stuff going to 730 the ith processor 731 */ 732 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 733 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 734 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 735 starts[0] = 0; 736 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 737 for ( i=0; i<baij->stash.n; i++ ) { 738 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 739 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 740 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 741 } 742 PetscFree(owner); 743 starts[0] = 0; 744 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 745 count = 0; 746 for ( i=0; i<size; i++ ) { 747 if (procs[i]) { 748 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 749 } 750 } 751 PetscFree(starts); PetscFree(nprocs); 752 753 /* Free cache space */ 754 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 755 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 756 757 baij->svalues = svalues; baij->rvalues = rvalues; 758 baij->nsends = nsends; baij->nrecvs = nreceives; 759 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 760 baij->rmax = nmax; 761 762 PetscFunctionReturn(0); 763 } 764 765 /* 766 Creates the hash table, and sets the table 767 This table is created only once. 768 If new entried need to be added to the matrix 769 then the hash table has to be destroyed and 770 recreated. 771 */ 772 #undef __FUNC__ 773 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 774 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 775 { 776 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 777 Mat A = baij->A, B=baij->B; 778 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 779 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 780 int size,bs2=baij->bs2,rstart=baij->rstart; 781 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 782 int *HT,key; 783 Scalar **HD; 784 double tmp; 785 #if defined(USE_PETSC_BOPT_g) 786 int ct=0,max=0; 787 #endif 788 789 PetscFunctionBegin; 790 baij->ht_size=(int)(factor*nz); 791 size = baij->ht_size; 792 793 if (baij->ht) { 794 PetscFunctionReturn(0); 795 } 796 797 /* Allocate Memory for Hash Table */ 798 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 799 baij->ht = (int*)(baij->hd + size); 800 HD = baij->hd; 801 HT = baij->ht; 802 803 804 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 805 806 807 /* Loop Over A */ 808 for ( i=0; i<a->mbs; i++ ) { 809 for ( j=ai[i]; j<ai[i+1]; j++ ) { 810 row = i+rstart; 811 col = aj[j]+cstart; 812 813 key = row*Nbs + col + 1; 814 h1 = HASH(size,key,tmp); 815 for ( k=0; k<size; k++ ){ 816 if (HT[(h1+k)%size] == 0.0) { 817 HT[(h1+k)%size] = key; 818 HD[(h1+k)%size] = a->a + j*bs2; 819 break; 820 #if defined(USE_PETSC_BOPT_g) 821 } else { 822 ct++; 823 #endif 824 } 825 } 826 #if defined(USE_PETSC_BOPT_g) 827 if (k> max) max = k; 828 #endif 829 } 830 } 831 /* Loop Over B */ 832 for ( i=0; i<b->mbs; i++ ) { 833 for ( j=bi[i]; j<bi[i+1]; j++ ) { 834 row = i+rstart; 835 col = garray[bj[j]]; 836 key = row*Nbs + col + 1; 837 h1 = HASH(size,key,tmp); 838 for ( k=0; k<size; k++ ){ 839 if (HT[(h1+k)%size] == 0.0) { 840 HT[(h1+k)%size] = key; 841 HD[(h1+k)%size] = b->a + j*bs2; 842 break; 843 #if defined(USE_PETSC_BOPT_g) 844 } else { 845 ct++; 846 #endif 847 } 848 } 849 #if defined(USE_PETSC_BOPT_g) 850 if (k> max) max = k; 851 #endif 852 } 853 } 854 855 /* Print Summary */ 856 #if defined(USE_PETSC_BOPT_g) 857 for ( i=0,j=0; i<size; i++) 858 if (HT[i]) {j++;} 859 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 860 (j== 0)? 0.0:((double)(ct+j))/j,max); 861 #endif 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNC__ 866 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 867 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 868 { 869 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 870 MPI_Status *send_status,recv_status; 871 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 872 int bs=baij->bs,row,col,other_disassembled; 873 Scalar *values,val; 874 InsertMode addv = mat->insertmode; 875 876 PetscFunctionBegin; 877 /* wait on receives */ 878 while (count) { 879 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 880 /* unpack receives into our local space */ 881 values = baij->rvalues + 3*imdex*baij->rmax; 882 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 883 n = n/3; 884 for ( i=0; i<n; i++ ) { 885 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 886 col = (int) PetscReal(values[3*i+1]); 887 val = values[3*i+2]; 888 if (col >= baij->cstart*bs && col < baij->cend*bs) { 889 col -= baij->cstart*bs; 890 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 891 } else { 892 if (mat->was_assembled) { 893 if (!baij->colmap) { 894 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 895 } 896 col = (baij->colmap[col/bs]) - 1 + col%bs; 897 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 898 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 899 col = (int) PetscReal(values[3*i+1]); 900 } 901 } 902 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 903 } 904 } 905 count--; 906 } 907 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 908 909 /* wait on sends */ 910 if (baij->nsends) { 911 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 912 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 913 PetscFree(send_status); 914 } 915 PetscFree(baij->send_waits); PetscFree(baij->svalues); 916 917 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 918 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 919 920 /* determine if any processor has disassembled, if so we must 921 also disassemble ourselfs, in order that we may reassemble. */ 922 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 923 if (mat->was_assembled && !other_disassembled) { 924 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 925 } 926 927 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 928 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 929 } 930 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 931 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 932 933 #if defined(USE_PETSC_BOPT_g) 934 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 935 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 936 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 937 baij->ht_total_ct = 0; 938 baij->ht_insert_ct = 0; 939 } 940 #endif 941 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 942 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 943 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 944 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 945 } 946 947 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNC__ 952 #define __FUNC__ "MatView_MPIBAIJ_Binary" 953 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 954 { 955 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 956 int ierr; 957 958 PetscFunctionBegin; 959 if (baij->size == 1) { 960 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 961 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNC__ 966 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 967 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 968 { 969 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 970 int ierr, format,rank,bs = baij->bs; 971 FILE *fd; 972 ViewerType vtype; 973 974 PetscFunctionBegin; 975 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 976 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 977 ierr = ViewerGetFormat(viewer,&format); 978 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 979 MatInfo info; 980 MPI_Comm_rank(mat->comm,&rank); 981 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 982 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 983 PetscSequentialPhaseBegin(mat->comm,1); 984 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 985 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 986 baij->bs,(int)info.memory); 987 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 988 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 989 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 990 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 991 fflush(fd); 992 PetscSequentialPhaseEnd(mat->comm,1); 993 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 996 PetscPrintf(mat->comm," block size is %d\n",bs); 997 PetscFunctionReturn(0); 998 } 999 } 1000 1001 if (vtype == DRAW_VIEWER) { 1002 Draw draw; 1003 PetscTruth isnull; 1004 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 1005 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1006 } 1007 1008 if (vtype == ASCII_FILE_VIEWER) { 1009 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1010 PetscSequentialPhaseBegin(mat->comm,1); 1011 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 1012 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 1013 baij->cstart*bs,baij->cend*bs); 1014 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1015 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 1016 fflush(fd); 1017 PetscSequentialPhaseEnd(mat->comm,1); 1018 } else { 1019 int size = baij->size; 1020 rank = baij->rank; 1021 if (size == 1) { 1022 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1023 } else { 1024 /* assemble the entire matrix onto first processor. */ 1025 Mat A; 1026 Mat_SeqBAIJ *Aloc; 1027 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 1028 int mbs=baij->mbs; 1029 Scalar *a; 1030 1031 if (!rank) { 1032 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1033 } else { 1034 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1035 } 1036 PLogObjectParent(mat,A); 1037 1038 /* copy over the A part */ 1039 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1040 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1041 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1042 1043 for ( i=0; i<mbs; i++ ) { 1044 rvals[0] = bs*(baij->rstart + i); 1045 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1046 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1047 col = (baij->cstart+aj[j])*bs; 1048 for (k=0; k<bs; k++ ) { 1049 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1050 col++; a += bs; 1051 } 1052 } 1053 } 1054 /* copy over the B part */ 1055 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1056 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1057 for ( i=0; i<mbs; i++ ) { 1058 rvals[0] = bs*(baij->rstart + i); 1059 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1060 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1061 col = baij->garray[aj[j]]*bs; 1062 for (k=0; k<bs; k++ ) { 1063 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1064 col++; a += bs; 1065 } 1066 } 1067 } 1068 PetscFree(rvals); 1069 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1070 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1071 /* 1072 Everyone has to call to draw the matrix since the graphics waits are 1073 synchronized across all processors that share the Draw object 1074 */ 1075 if (!rank || vtype == DRAW_VIEWER) { 1076 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1077 } 1078 ierr = MatDestroy(A); CHKERRQ(ierr); 1079 } 1080 } 1081 PetscFunctionReturn(0); 1082 } 1083 1084 1085 1086 #undef __FUNC__ 1087 #define __FUNC__ "MatView_MPIBAIJ" 1088 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1089 { 1090 int ierr; 1091 ViewerType vtype; 1092 1093 PetscFunctionBegin; 1094 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1095 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 1096 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 1097 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1098 } else if (vtype == BINARY_FILE_VIEWER) { 1099 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1100 } else { 1101 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1102 } 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNC__ 1107 #define __FUNC__ "MatDestroy_MPIBAIJ" 1108 int MatDestroy_MPIBAIJ(Mat mat) 1109 { 1110 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1111 int ierr; 1112 1113 PetscFunctionBegin; 1114 #if defined(USE_PETSC_LOG) 1115 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1116 #endif 1117 1118 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1119 PetscFree(baij->rowners); 1120 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1121 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1122 if (baij->colmap) PetscFree(baij->colmap); 1123 if (baij->garray) PetscFree(baij->garray); 1124 if (baij->lvec) VecDestroy(baij->lvec); 1125 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1126 if (baij->rowvalues) PetscFree(baij->rowvalues); 1127 if (baij->barray) PetscFree(baij->barray); 1128 if (baij->hd) PetscFree(baij->hd); 1129 PetscFree(baij); 1130 PLogObjectDestroy(mat); 1131 PetscHeaderDestroy(mat); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNC__ 1136 #define __FUNC__ "MatMult_MPIBAIJ" 1137 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1138 { 1139 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1140 int ierr, nt; 1141 1142 PetscFunctionBegin; 1143 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1144 if (nt != a->n) { 1145 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1146 } 1147 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1148 if (nt != a->m) { 1149 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1150 } 1151 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1152 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1153 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1154 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1155 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNC__ 1160 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1161 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1162 { 1163 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1164 int ierr; 1165 1166 PetscFunctionBegin; 1167 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1168 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1169 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1170 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNC__ 1175 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1176 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1177 { 1178 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1179 int ierr; 1180 1181 PetscFunctionBegin; 1182 /* do nondiagonal part */ 1183 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1184 /* send it on its way */ 1185 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1186 /* do local part */ 1187 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1188 /* receive remote parts: note this assumes the values are not actually */ 1189 /* inserted in yy until the next line, which is true for my implementation*/ 1190 /* but is not perhaps always true. */ 1191 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNC__ 1196 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1197 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1198 { 1199 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1200 int ierr; 1201 1202 PetscFunctionBegin; 1203 /* do nondiagonal part */ 1204 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1205 /* send it on its way */ 1206 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1207 /* do local part */ 1208 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1209 /* receive remote parts: note this assumes the values are not actually */ 1210 /* inserted in yy until the next line, which is true for my implementation*/ 1211 /* but is not perhaps always true. */ 1212 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 /* 1217 This only works correctly for square matrices where the subblock A->A is the 1218 diagonal block 1219 */ 1220 #undef __FUNC__ 1221 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1222 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1223 { 1224 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1225 int ierr; 1226 1227 PetscFunctionBegin; 1228 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1229 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNC__ 1234 #define __FUNC__ "MatScale_MPIBAIJ" 1235 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1236 { 1237 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1238 int ierr; 1239 1240 PetscFunctionBegin; 1241 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1242 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNC__ 1247 #define __FUNC__ "MatGetSize_MPIBAIJ" 1248 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1249 { 1250 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1251 1252 PetscFunctionBegin; 1253 if (m) *m = mat->M; 1254 if (n) *n = mat->N; 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNC__ 1259 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1260 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1261 { 1262 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1263 1264 PetscFunctionBegin; 1265 *m = mat->m; *n = mat->n; 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNC__ 1270 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1271 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1272 { 1273 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1274 1275 PetscFunctionBegin; 1276 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1277 PetscFunctionReturn(0); 1278 } 1279 1280 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1281 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1282 1283 #undef __FUNC__ 1284 #define __FUNC__ "MatGetRow_MPIBAIJ" 1285 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1286 { 1287 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1288 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1289 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1290 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1291 int *cmap, *idx_p,cstart = mat->cstart; 1292 1293 PetscFunctionBegin; 1294 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1295 mat->getrowactive = PETSC_TRUE; 1296 1297 if (!mat->rowvalues && (idx || v)) { 1298 /* 1299 allocate enough space to hold information from the longest row. 1300 */ 1301 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1302 int max = 1,mbs = mat->mbs,tmp; 1303 for ( i=0; i<mbs; i++ ) { 1304 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1305 if (max < tmp) { max = tmp; } 1306 } 1307 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1308 CHKPTRQ(mat->rowvalues); 1309 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1310 } 1311 1312 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1313 lrow = row - brstart; 1314 1315 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1316 if (!v) {pvA = 0; pvB = 0;} 1317 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1318 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1319 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1320 nztot = nzA + nzB; 1321 1322 cmap = mat->garray; 1323 if (v || idx) { 1324 if (nztot) { 1325 /* Sort by increasing column numbers, assuming A and B already sorted */ 1326 int imark = -1; 1327 if (v) { 1328 *v = v_p = mat->rowvalues; 1329 for ( i=0; i<nzB; i++ ) { 1330 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1331 else break; 1332 } 1333 imark = i; 1334 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1335 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1336 } 1337 if (idx) { 1338 *idx = idx_p = mat->rowindices; 1339 if (imark > -1) { 1340 for ( i=0; i<imark; i++ ) { 1341 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1342 } 1343 } else { 1344 for ( i=0; i<nzB; i++ ) { 1345 if (cmap[cworkB[i]/bs] < cstart) 1346 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1347 else break; 1348 } 1349 imark = i; 1350 } 1351 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1352 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1353 } 1354 } else { 1355 if (idx) *idx = 0; 1356 if (v) *v = 0; 1357 } 1358 } 1359 *nz = nztot; 1360 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1361 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNC__ 1366 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1367 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1368 { 1369 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1370 1371 PetscFunctionBegin; 1372 if (baij->getrowactive == PETSC_FALSE) { 1373 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1374 } 1375 baij->getrowactive = PETSC_FALSE; 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNC__ 1380 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1381 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1382 { 1383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1384 1385 PetscFunctionBegin; 1386 *bs = baij->bs; 1387 PetscFunctionReturn(0); 1388 } 1389 1390 #undef __FUNC__ 1391 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1392 int MatZeroEntries_MPIBAIJ(Mat A) 1393 { 1394 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1395 int ierr; 1396 1397 PetscFunctionBegin; 1398 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1399 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNC__ 1404 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1405 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1406 { 1407 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1408 Mat A = a->A, B = a->B; 1409 int ierr; 1410 double isend[5], irecv[5]; 1411 1412 PetscFunctionBegin; 1413 info->block_size = (double)a->bs; 1414 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1415 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1416 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1417 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1418 if (flag == MAT_LOCAL) { 1419 info->nz_used = isend[0]; 1420 info->nz_allocated = isend[1]; 1421 info->nz_unneeded = isend[2]; 1422 info->memory = isend[3]; 1423 info->mallocs = isend[4]; 1424 } else if (flag == MAT_GLOBAL_MAX) { 1425 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 1426 info->nz_used = irecv[0]; 1427 info->nz_allocated = irecv[1]; 1428 info->nz_unneeded = irecv[2]; 1429 info->memory = irecv[3]; 1430 info->mallocs = irecv[4]; 1431 } else if (flag == MAT_GLOBAL_SUM) { 1432 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 1433 info->nz_used = irecv[0]; 1434 info->nz_allocated = irecv[1]; 1435 info->nz_unneeded = irecv[2]; 1436 info->memory = irecv[3]; 1437 info->mallocs = irecv[4]; 1438 } 1439 info->rows_global = (double)a->M; 1440 info->columns_global = (double)a->N; 1441 info->rows_local = (double)a->m; 1442 info->columns_local = (double)a->N; 1443 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1444 info->fill_ratio_needed = 0; 1445 info->factor_mallocs = 0; 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNC__ 1450 #define __FUNC__ "MatSetOption_MPIBAIJ" 1451 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1452 { 1453 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1454 1455 PetscFunctionBegin; 1456 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1457 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1458 op == MAT_COLUMNS_UNSORTED || 1459 op == MAT_COLUMNS_SORTED || 1460 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1461 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1462 MatSetOption(a->A,op); 1463 MatSetOption(a->B,op); 1464 } else if (op == MAT_ROW_ORIENTED) { 1465 a->roworiented = 1; 1466 MatSetOption(a->A,op); 1467 MatSetOption(a->B,op); 1468 } else if (op == MAT_ROWS_SORTED || 1469 op == MAT_ROWS_UNSORTED || 1470 op == MAT_SYMMETRIC || 1471 op == MAT_STRUCTURALLY_SYMMETRIC || 1472 op == MAT_YES_NEW_DIAGONALS || 1473 op == MAT_USE_HASH_TABLE) 1474 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1475 else if (op == MAT_COLUMN_ORIENTED) { 1476 a->roworiented = 0; 1477 MatSetOption(a->A,op); 1478 MatSetOption(a->B,op); 1479 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1480 a->donotstash = 1; 1481 } else if (op == MAT_NO_NEW_DIAGONALS) { 1482 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1483 } else if (op == MAT_USE_HASH_TABLE) { 1484 a->ht_flag = 1; 1485 } else { 1486 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1487 } 1488 PetscFunctionReturn(0); 1489 } 1490 1491 #undef __FUNC__ 1492 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1493 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1494 { 1495 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1496 Mat_SeqBAIJ *Aloc; 1497 Mat B; 1498 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1499 int bs=baij->bs,mbs=baij->mbs; 1500 Scalar *a; 1501 1502 PetscFunctionBegin; 1503 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1504 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1505 CHKERRQ(ierr); 1506 1507 /* copy over the A part */ 1508 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1509 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1510 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1511 1512 for ( i=0; i<mbs; i++ ) { 1513 rvals[0] = bs*(baij->rstart + i); 1514 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1515 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1516 col = (baij->cstart+aj[j])*bs; 1517 for (k=0; k<bs; k++ ) { 1518 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1519 col++; a += bs; 1520 } 1521 } 1522 } 1523 /* copy over the B part */ 1524 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1525 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1526 for ( i=0; i<mbs; i++ ) { 1527 rvals[0] = bs*(baij->rstart + i); 1528 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1529 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1530 col = baij->garray[aj[j]]*bs; 1531 for (k=0; k<bs; k++ ) { 1532 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1533 col++; a += bs; 1534 } 1535 } 1536 } 1537 PetscFree(rvals); 1538 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1539 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1540 1541 if (matout != PETSC_NULL) { 1542 *matout = B; 1543 } else { 1544 PetscOps *Abops; 1545 struct _MatOps *Aops; 1546 1547 /* This isn't really an in-place transpose .... but free data structures from baij */ 1548 PetscFree(baij->rowners); 1549 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1550 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1551 if (baij->colmap) PetscFree(baij->colmap); 1552 if (baij->garray) PetscFree(baij->garray); 1553 if (baij->lvec) VecDestroy(baij->lvec); 1554 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1555 PetscFree(baij); 1556 1557 /* 1558 This is horrible, horrible code. We need to keep the 1559 A pointers for the bops and ops but copy everything 1560 else from C. 1561 */ 1562 Abops = A->bops; 1563 Aops = A->ops; 1564 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1565 A->bops = Abops; 1566 A->ops = Aops; 1567 1568 PetscHeaderDestroy(B); 1569 } 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #undef __FUNC__ 1574 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1575 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1576 { 1577 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1578 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1579 int ierr,s1,s2,s3; 1580 1581 PetscFunctionBegin; 1582 if (ll) { 1583 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1584 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1585 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1586 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1587 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1588 } 1589 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1590 PetscFunctionReturn(0); 1591 } 1592 1593 #undef __FUNC__ 1594 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1595 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1596 { 1597 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1598 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1599 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1600 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1601 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1602 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1603 MPI_Comm comm = A->comm; 1604 MPI_Request *send_waits,*recv_waits; 1605 MPI_Status recv_status,*send_status; 1606 IS istmp; 1607 PetscTruth localdiag; 1608 1609 PetscFunctionBegin; 1610 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1611 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1612 1613 /* first count number of contributors to each processor */ 1614 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1615 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1616 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1617 for ( i=0; i<N; i++ ) { 1618 idx = rows[i]; 1619 found = 0; 1620 for ( j=0; j<size; j++ ) { 1621 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1622 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1623 } 1624 } 1625 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1626 } 1627 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1628 1629 /* inform other processors of number of messages and max length*/ 1630 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1631 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1632 nrecvs = work[rank]; 1633 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1634 nmax = work[rank]; 1635 PetscFree(work); 1636 1637 /* post receives: */ 1638 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1639 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1640 for ( i=0; i<nrecvs; i++ ) { 1641 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1642 } 1643 1644 /* do sends: 1645 1) starts[i] gives the starting index in svalues for stuff going to 1646 the ith processor 1647 */ 1648 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1649 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1650 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1651 starts[0] = 0; 1652 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1653 for ( i=0; i<N; i++ ) { 1654 svalues[starts[owner[i]]++] = rows[i]; 1655 } 1656 ISRestoreIndices(is,&rows); 1657 1658 starts[0] = 0; 1659 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1660 count = 0; 1661 for ( i=0; i<size; i++ ) { 1662 if (procs[i]) { 1663 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1664 } 1665 } 1666 PetscFree(starts); 1667 1668 base = owners[rank]*bs; 1669 1670 /* wait on receives */ 1671 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1672 source = lens + nrecvs; 1673 count = nrecvs; slen = 0; 1674 while (count) { 1675 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1676 /* unpack receives into our local space */ 1677 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1678 source[imdex] = recv_status.MPI_SOURCE; 1679 lens[imdex] = n; 1680 slen += n; 1681 count--; 1682 } 1683 PetscFree(recv_waits); 1684 1685 /* move the data into the send scatter */ 1686 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1687 count = 0; 1688 for ( i=0; i<nrecvs; i++ ) { 1689 values = rvalues + i*nmax; 1690 for ( j=0; j<lens[i]; j++ ) { 1691 lrows[count++] = values[j] - base; 1692 } 1693 } 1694 PetscFree(rvalues); PetscFree(lens); 1695 PetscFree(owner); PetscFree(nprocs); 1696 1697 /* actually zap the local rows */ 1698 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1699 PLogObjectParent(A,istmp); 1700 1701 /* 1702 Zero the required rows. If the "diagonal block" of the matrix 1703 is square and the user wishes to set the diagonal we use seperate 1704 code so that MatSetValues() is not called for each diagonal allocating 1705 new memory, thus calling lots of mallocs and slowing things down. 1706 1707 Contributed by: Mathew Knepley 1708 */ 1709 localdiag = PETSC_FALSE; 1710 if (diag && (l->A->M == l->A->N)) { 1711 localdiag = PETSC_TRUE; 1712 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1713 } else { 1714 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1715 } 1716 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1717 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1718 1719 if (diag && (localdiag == PETSC_FALSE)) { 1720 for ( i = 0; i < slen; i++ ) { 1721 row = lrows[i] + rstart_bs; 1722 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1723 } 1724 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1725 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1726 } 1727 PetscFree(lrows); 1728 1729 /* wait on sends */ 1730 if (nsends) { 1731 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1732 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1733 PetscFree(send_status); 1734 } 1735 PetscFree(send_waits); PetscFree(svalues); 1736 1737 PetscFunctionReturn(0); 1738 } 1739 1740 extern int MatPrintHelp_SeqBAIJ(Mat); 1741 #undef __FUNC__ 1742 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1743 int MatPrintHelp_MPIBAIJ(Mat A) 1744 { 1745 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1746 MPI_Comm comm = A->comm; 1747 static int called = 0; 1748 int ierr; 1749 1750 PetscFunctionBegin; 1751 if (!a->rank) { 1752 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1753 } 1754 if (called) {PetscFunctionReturn(0);} else called = 1; 1755 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1756 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNC__ 1761 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1762 int MatSetUnfactored_MPIBAIJ(Mat A) 1763 { 1764 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1765 int ierr; 1766 1767 PetscFunctionBegin; 1768 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1769 PetscFunctionReturn(0); 1770 } 1771 1772 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1773 1774 /* -------------------------------------------------------------------*/ 1775 static struct _MatOps MatOps = { 1776 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1777 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1778 0,0,0,0, 1779 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1780 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1781 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1782 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1783 0,0,0,MatGetSize_MPIBAIJ, 1784 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1785 0,0,MatConvertSameType_MPIBAIJ,0,0, 1786 0,0,0,MatGetSubMatrices_MPIBAIJ, 1787 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1788 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1789 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1790 1791 1792 #undef __FUNC__ 1793 #define __FUNC__ "MatCreateMPIBAIJ" 1794 /*@C 1795 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1796 (block compressed row). For good matrix assembly performance 1797 the user should preallocate the matrix storage by setting the parameters 1798 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1799 performance can be increased by more than a factor of 50. 1800 1801 Collective on MPI_Comm 1802 1803 Input Parameters: 1804 + comm - MPI communicator 1805 . bs - size of blockk 1806 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1807 This value should be the same as the local size used in creating the 1808 y vector for the matrix-vector product y = Ax. 1809 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1810 This value should be the same as the local size used in creating the 1811 x vector for the matrix-vector product y = Ax. 1812 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1813 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1814 . d_nz - number of block nonzeros per block row in diagonal portion of local 1815 submatrix (same for all local rows) 1816 . d_nzz - array containing the number of block nonzeros in the various block rows 1817 of the in diagonal portion of the local (possibly different for each block 1818 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1819 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1820 submatrix (same for all local rows). 1821 - o_nzz - array containing the number of nonzeros in the various block rows of the 1822 off-diagonal portion of the local submatrix (possibly different for 1823 each block row) or PETSC_NULL. 1824 1825 Output Parameter: 1826 . A - the matrix 1827 1828 Options Database Keys: 1829 . -mat_no_unroll - uses code that does not unroll the loops in the 1830 block calculations (much slower) 1831 . -mat_block_size - size of the blocks to use 1832 1833 Notes: 1834 The user MUST specify either the local or global matrix dimensions 1835 (possibly both). 1836 1837 Storage Information: 1838 For a square global matrix we define each processor's diagonal portion 1839 to be its local rows and the corresponding columns (a square submatrix); 1840 each processor's off-diagonal portion encompasses the remainder of the 1841 local matrix (a rectangular submatrix). 1842 1843 The user can specify preallocated storage for the diagonal part of 1844 the local submatrix with either d_nz or d_nnz (not both). Set 1845 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1846 memory allocation. Likewise, specify preallocated storage for the 1847 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1848 1849 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1850 the figure below we depict these three local rows and all columns (0-11). 1851 1852 .vb 1853 0 1 2 3 4 5 6 7 8 9 10 11 1854 ------------------- 1855 row 3 | o o o d d d o o o o o o 1856 row 4 | o o o d d d o o o o o o 1857 row 5 | o o o d d d o o o o o o 1858 ------------------- 1859 .ve 1860 1861 Thus, any entries in the d locations are stored in the d (diagonal) 1862 submatrix, and any entries in the o locations are stored in the 1863 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1864 stored simply in the MATSEQBAIJ format for compressed row storage. 1865 1866 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1867 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1868 In general, for PDE problems in which most nonzeros are near the diagonal, 1869 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1870 or you will get TERRIBLE performance; see the users' manual chapter on 1871 matrices. 1872 1873 .keywords: matrix, block, aij, compressed row, sparse, parallel 1874 1875 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1876 @*/ 1877 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1878 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1879 { 1880 Mat B; 1881 Mat_MPIBAIJ *b; 1882 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1883 1884 PetscFunctionBegin; 1885 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1886 1887 MPI_Comm_size(comm,&size); 1888 if (size == 1) { 1889 if (M == PETSC_DECIDE) M = m; 1890 if (N == PETSC_DECIDE) N = n; 1891 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1892 PetscFunctionReturn(0); 1893 } 1894 1895 *A = 0; 1896 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1897 PLogObjectCreate(B); 1898 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1899 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1900 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1901 1902 B->ops->destroy = MatDestroy_MPIBAIJ; 1903 B->ops->view = MatView_MPIBAIJ; 1904 B->mapping = 0; 1905 B->factor = 0; 1906 B->assembled = PETSC_FALSE; 1907 1908 B->insertmode = NOT_SET_VALUES; 1909 MPI_Comm_rank(comm,&b->rank); 1910 MPI_Comm_size(comm,&b->size); 1911 1912 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1913 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1914 } 1915 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1916 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1917 } 1918 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1919 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1920 } 1921 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1922 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1923 1924 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1925 work[0] = m; work[1] = n; 1926 mbs = m/bs; nbs = n/bs; 1927 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1928 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1929 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1930 } 1931 if (m == PETSC_DECIDE) { 1932 Mbs = M/bs; 1933 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1934 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1935 m = mbs*bs; 1936 } 1937 if (n == PETSC_DECIDE) { 1938 Nbs = N/bs; 1939 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1940 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1941 n = nbs*bs; 1942 } 1943 if (mbs*bs != m || nbs*bs != n) { 1944 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1945 } 1946 1947 b->m = m; B->m = m; 1948 b->n = n; B->n = n; 1949 b->N = N; B->N = N; 1950 b->M = M; B->M = M; 1951 b->bs = bs; 1952 b->bs2 = bs*bs; 1953 b->mbs = mbs; 1954 b->nbs = nbs; 1955 b->Mbs = Mbs; 1956 b->Nbs = Nbs; 1957 1958 /* build local table of row and column ownerships */ 1959 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1960 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1961 b->cowners = b->rowners + b->size + 2; 1962 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1963 b->rowners[0] = 0; 1964 for ( i=2; i<=b->size; i++ ) { 1965 b->rowners[i] += b->rowners[i-1]; 1966 } 1967 b->rstart = b->rowners[b->rank]; 1968 b->rend = b->rowners[b->rank+1]; 1969 b->rstart_bs = b->rstart * bs; 1970 b->rend_bs = b->rend * bs; 1971 1972 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1973 b->cowners[0] = 0; 1974 for ( i=2; i<=b->size; i++ ) { 1975 b->cowners[i] += b->cowners[i-1]; 1976 } 1977 b->cstart = b->cowners[b->rank]; 1978 b->cend = b->cowners[b->rank+1]; 1979 b->cstart_bs = b->cstart * bs; 1980 b->cend_bs = b->cend * bs; 1981 1982 1983 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1984 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1985 PLogObjectParent(B,b->A); 1986 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1987 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1988 PLogObjectParent(B,b->B); 1989 1990 /* build cache for off array entries formed */ 1991 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1992 b->donotstash = 0; 1993 b->colmap = 0; 1994 b->garray = 0; 1995 b->roworiented = 1; 1996 1997 /* stuff used in block assembly */ 1998 b->barray = 0; 1999 2000 /* stuff used for matrix vector multiply */ 2001 b->lvec = 0; 2002 b->Mvctx = 0; 2003 2004 /* stuff for MatGetRow() */ 2005 b->rowindices = 0; 2006 b->rowvalues = 0; 2007 b->getrowactive = PETSC_FALSE; 2008 2009 /* hash table stuff */ 2010 b->ht = 0; 2011 b->hd = 0; 2012 b->ht_size = 0; 2013 b->ht_flag = 0; 2014 b->ht_fact = 0; 2015 b->ht_total_ct = 0; 2016 b->ht_insert_ct = 0; 2017 2018 *A = B; 2019 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2020 if (flg) { 2021 double fact = 1.39; 2022 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2023 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2024 if (fact <= 1.0) fact = 1.39; 2025 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2026 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2027 } 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #undef __FUNC__ 2032 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 2033 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 2034 { 2035 Mat mat; 2036 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2037 int ierr, len=0, flg; 2038 2039 PetscFunctionBegin; 2040 *newmat = 0; 2041 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 2042 PLogObjectCreate(mat); 2043 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2044 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 2045 mat->ops->destroy = MatDestroy_MPIBAIJ; 2046 mat->ops->view = MatView_MPIBAIJ; 2047 mat->factor = matin->factor; 2048 mat->assembled = PETSC_TRUE; 2049 2050 a->m = mat->m = oldmat->m; 2051 a->n = mat->n = oldmat->n; 2052 a->M = mat->M = oldmat->M; 2053 a->N = mat->N = oldmat->N; 2054 2055 a->bs = oldmat->bs; 2056 a->bs2 = oldmat->bs2; 2057 a->mbs = oldmat->mbs; 2058 a->nbs = oldmat->nbs; 2059 a->Mbs = oldmat->Mbs; 2060 a->Nbs = oldmat->Nbs; 2061 2062 a->rstart = oldmat->rstart; 2063 a->rend = oldmat->rend; 2064 a->cstart = oldmat->cstart; 2065 a->cend = oldmat->cend; 2066 a->size = oldmat->size; 2067 a->rank = oldmat->rank; 2068 mat->insertmode = NOT_SET_VALUES; 2069 a->rowvalues = 0; 2070 a->getrowactive = PETSC_FALSE; 2071 a->barray = 0; 2072 2073 /* hash table stuff */ 2074 a->ht = 0; 2075 a->hd = 0; 2076 a->ht_size = 0; 2077 a->ht_flag = oldmat->ht_flag; 2078 a->ht_fact = oldmat->ht_fact; 2079 a->ht_total_ct = 0; 2080 a->ht_insert_ct = 0; 2081 2082 2083 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2084 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2085 a->cowners = a->rowners + a->size + 2; 2086 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2087 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 2088 if (oldmat->colmap) { 2089 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2090 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2091 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2092 } else a->colmap = 0; 2093 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2094 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2095 PLogObjectMemory(mat,len*sizeof(int)); 2096 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2097 } else a->garray = 0; 2098 2099 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2100 PLogObjectParent(mat,a->lvec); 2101 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2102 PLogObjectParent(mat,a->Mvctx); 2103 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 2104 PLogObjectParent(mat,a->A); 2105 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 2106 PLogObjectParent(mat,a->B); 2107 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2108 if (flg) { 2109 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2110 } 2111 *newmat = mat; 2112 PetscFunctionReturn(0); 2113 } 2114 2115 #include "sys.h" 2116 2117 #undef __FUNC__ 2118 #define __FUNC__ "MatLoad_MPIBAIJ" 2119 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2120 { 2121 Mat A; 2122 int i, nz, ierr, j,rstart, rend, fd; 2123 Scalar *vals,*buf; 2124 MPI_Comm comm = ((PetscObject)viewer)->comm; 2125 MPI_Status status; 2126 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2127 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2128 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2129 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2130 int dcount,kmax,k,nzcount,tmp; 2131 2132 PetscFunctionBegin; 2133 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2134 2135 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2136 if (!rank) { 2137 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2138 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2139 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2140 if (header[3] < 0) { 2141 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2142 } 2143 } 2144 2145 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2146 M = header[1]; N = header[2]; 2147 2148 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2149 2150 /* 2151 This code adds extra rows to make sure the number of rows is 2152 divisible by the blocksize 2153 */ 2154 Mbs = M/bs; 2155 extra_rows = bs - M + bs*(Mbs); 2156 if (extra_rows == bs) extra_rows = 0; 2157 else Mbs++; 2158 if (extra_rows &&!rank) { 2159 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2160 } 2161 2162 /* determine ownership of all rows */ 2163 mbs = Mbs/size + ((Mbs % size) > rank); 2164 m = mbs * bs; 2165 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2166 browners = rowners + size + 1; 2167 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2168 rowners[0] = 0; 2169 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2170 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2171 rstart = rowners[rank]; 2172 rend = rowners[rank+1]; 2173 2174 /* distribute row lengths to all processors */ 2175 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2176 if (!rank) { 2177 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2178 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2179 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2180 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2181 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2182 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2183 PetscFree(sndcounts); 2184 } else { 2185 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2186 } 2187 2188 if (!rank) { 2189 /* calculate the number of nonzeros on each processor */ 2190 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2191 PetscMemzero(procsnz,size*sizeof(int)); 2192 for ( i=0; i<size; i++ ) { 2193 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2194 procsnz[i] += rowlengths[j]; 2195 } 2196 } 2197 PetscFree(rowlengths); 2198 2199 /* determine max buffer needed and allocate it */ 2200 maxnz = 0; 2201 for ( i=0; i<size; i++ ) { 2202 maxnz = PetscMax(maxnz,procsnz[i]); 2203 } 2204 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2205 2206 /* read in my part of the matrix column indices */ 2207 nz = procsnz[0]; 2208 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2209 mycols = ibuf; 2210 if (size == 1) nz -= extra_rows; 2211 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2212 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2213 2214 /* read in every ones (except the last) and ship off */ 2215 for ( i=1; i<size-1; i++ ) { 2216 nz = procsnz[i]; 2217 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2218 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2219 } 2220 /* read in the stuff for the last proc */ 2221 if ( size != 1 ) { 2222 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2223 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2224 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2225 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2226 } 2227 PetscFree(cols); 2228 } else { 2229 /* determine buffer space needed for message */ 2230 nz = 0; 2231 for ( i=0; i<m; i++ ) { 2232 nz += locrowlens[i]; 2233 } 2234 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2235 mycols = ibuf; 2236 /* receive message of column indices*/ 2237 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2238 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2239 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2240 } 2241 2242 /* loop over local rows, determining number of off diagonal entries */ 2243 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2244 odlens = dlens + (rend-rstart); 2245 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2246 PetscMemzero(mask,3*Mbs*sizeof(int)); 2247 masked1 = mask + Mbs; 2248 masked2 = masked1 + Mbs; 2249 rowcount = 0; nzcount = 0; 2250 for ( i=0; i<mbs; i++ ) { 2251 dcount = 0; 2252 odcount = 0; 2253 for ( j=0; j<bs; j++ ) { 2254 kmax = locrowlens[rowcount]; 2255 for ( k=0; k<kmax; k++ ) { 2256 tmp = mycols[nzcount++]/bs; 2257 if (!mask[tmp]) { 2258 mask[tmp] = 1; 2259 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2260 else masked1[dcount++] = tmp; 2261 } 2262 } 2263 rowcount++; 2264 } 2265 2266 dlens[i] = dcount; 2267 odlens[i] = odcount; 2268 2269 /* zero out the mask elements we set */ 2270 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2271 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2272 } 2273 2274 /* create our matrix */ 2275 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2276 CHKERRQ(ierr); 2277 A = *newmat; 2278 MatSetOption(A,MAT_COLUMNS_SORTED); 2279 2280 if (!rank) { 2281 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2282 /* read in my part of the matrix numerical values */ 2283 nz = procsnz[0]; 2284 vals = buf; 2285 mycols = ibuf; 2286 if (size == 1) nz -= extra_rows; 2287 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2288 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2289 2290 /* insert into matrix */ 2291 jj = rstart*bs; 2292 for ( i=0; i<m; i++ ) { 2293 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2294 mycols += locrowlens[i]; 2295 vals += locrowlens[i]; 2296 jj++; 2297 } 2298 /* read in other processors (except the last one) and ship out */ 2299 for ( i=1; i<size-1; i++ ) { 2300 nz = procsnz[i]; 2301 vals = buf; 2302 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2303 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2304 } 2305 /* the last proc */ 2306 if ( size != 1 ){ 2307 nz = procsnz[i] - extra_rows; 2308 vals = buf; 2309 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2310 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2311 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2312 } 2313 PetscFree(procsnz); 2314 } else { 2315 /* receive numeric values */ 2316 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2317 2318 /* receive message of values*/ 2319 vals = buf; 2320 mycols = ibuf; 2321 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2322 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2323 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2324 2325 /* insert into matrix */ 2326 jj = rstart*bs; 2327 for ( i=0; i<m; i++ ) { 2328 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2329 mycols += locrowlens[i]; 2330 vals += locrowlens[i]; 2331 jj++; 2332 } 2333 } 2334 PetscFree(locrowlens); 2335 PetscFree(buf); 2336 PetscFree(ibuf); 2337 PetscFree(rowners); 2338 PetscFree(dlens); 2339 PetscFree(mask); 2340 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2341 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2342 PetscFunctionReturn(0); 2343 } 2344 2345 2346 2347 #undef __FUNC__ 2348 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2349 /*@ 2350 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2351 2352 Input Parameters: 2353 . mat - the matrix 2354 . fact - factor 2355 2356 Collective on Mat 2357 2358 Notes: 2359 This can also be set by the command line option: -mat_use_hash_table fact 2360 2361 .keywords: matrix, hashtable, factor, HT 2362 2363 .seealso: MatSetOption() 2364 @*/ 2365 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2366 { 2367 Mat_MPIBAIJ *baij; 2368 2369 PetscFunctionBegin; 2370 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2371 if (mat->type != MATMPIBAIJ) { 2372 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2373 } 2374 baij = (Mat_MPIBAIJ*) mat->data; 2375 baij->ht_fact = fact; 2376 PetscFunctionReturn(0); 2377 } 2378