1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.121 1998/05/11 21:55:16 bsmith Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" /*I "mat.h" I*/ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 23 static int CreateColmap_MPIBAIJ_Private(Mat mat) 24 { 25 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 26 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27 int nbs = B->nbs,i,bs=B->bs;; 28 29 PetscFunctionBegin; 30 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 31 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 32 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 34 PetscFunctionReturn(0); 35 } 36 37 #define CHUNKSIZE 10 38 39 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 40 { \ 41 \ 42 brow = row/bs; \ 43 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44 rmax = aimax[brow]; nrow = ailen[brow]; \ 45 bcol = col/bs; \ 46 ridx = row % bs; cidx = col % bs; \ 47 low = 0; high = nrow; \ 48 while (high-low > 3) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > bcol) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=low; _i<high; _i++ ) { \ 54 if (rp[_i] > bcol) break; \ 55 if (rp[_i] == bcol) { \ 56 bap = ap + bs2*_i + bs*cidx + ridx; \ 57 if (addv == ADD_VALUES) *bap += value; \ 58 else *bap = value; \ 59 goto a_noinsert; \ 60 } \ 61 } \ 62 if (a->nonew == 1) goto a_noinsert; \ 63 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 64 if (nrow >= rmax) { \ 65 /* there is no extra room in row, therefore enlarge */ \ 66 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 67 Scalar *new_a; \ 68 \ 69 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 70 \ 71 /* malloc new storage space */ \ 72 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 73 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 74 new_j = (int *) (new_a + bs2*new_nz); \ 75 new_i = new_j + new_nz; \ 76 \ 77 /* copy over old data into new slots */ \ 78 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 79 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 80 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 81 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 82 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 83 len*sizeof(int)); \ 84 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 85 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 86 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 87 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 88 /* free up old matrix storage */ \ 89 PetscFree(a->a); \ 90 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 91 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 92 a->singlemalloc = 1; \ 93 \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 96 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 97 a->maxnz += bs2*CHUNKSIZE; \ 98 a->reallocs++; \ 99 a->nz++; \ 100 } \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for ( ii=N; ii>=_i; ii-- ) { \ 104 rp[ii+1] = rp[ii]; \ 105 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 106 } \ 107 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 a_noinsert:; \ 111 ailen[brow] = nrow; \ 112 } 113 114 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 brow = row/bs; \ 118 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119 rmax = bimax[brow]; nrow = bilen[brow]; \ 120 bcol = col/bs; \ 121 ridx = row % bs; cidx = col % bs; \ 122 low = 0; high = nrow; \ 123 while (high-low > 3) { \ 124 t = (low+high)/2; \ 125 if (rp[t] > bcol) high = t; \ 126 else low = t; \ 127 } \ 128 for ( _i=low; _i<high; _i++ ) { \ 129 if (rp[_i] > bcol) break; \ 130 if (rp[_i] == bcol) { \ 131 bap = ap + bs2*_i + bs*cidx + ridx; \ 132 if (addv == ADD_VALUES) *bap += value; \ 133 else *bap = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (b->nonew == 1) goto b_noinsert; \ 138 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + bs2*new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158 len*sizeof(int)); \ 159 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163 /* free up old matrix storage */ \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 172 b->maxnz += bs2*CHUNKSIZE; \ 173 b->reallocs++; \ 174 b->nz++; \ 175 } \ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for ( ii=N; ii>=_i; ii-- ) { \ 179 rp[ii+1] = rp[ii]; \ 180 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181 } \ 182 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNC__ 190 #define __FUNC__ "MatSetValues_MPIBAIJ" 191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 194 Scalar value; 195 int ierr,i,j,row,col; 196 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 197 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 198 int cend_orig=baij->cend_bs,bs=baij->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 Scalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 Scalar *ba=b->a; 210 211 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 int low,high,t,ridx,cidx,bs2=a->bs2; 213 Scalar *ap,*bap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 #if defined(USE_PETSC_BOPT_g) 218 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig){ 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 #if defined(USE_PETSC_BOPT_g) 231 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 241 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ *) (B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba=b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !baij->donotstash) { 257 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!baij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272 #undef __FUNC__ 273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 Scalar *value,*barray=baij->barray; 278 int ierr,i,j,ii,jj,row,col,k,l; 279 int roworiented = baij->roworiented,rstart=baij->rstart ; 280 int rend=baij->rend,cstart=baij->cstart,stepval; 281 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282 283 if(!barray) { 284 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 285 } 286 287 if (roworiented) { 288 stepval = (n-1)*bs; 289 } else { 290 stepval = (m-1)*bs; 291 } 292 for ( i=0; i<m; i++ ) { 293 #if defined(USE_PETSC_BOPT_g) 294 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296 #endif 297 if (im[i] >= rstart && im[i] < rend) { 298 row = im[i] - rstart; 299 for ( j=0; j<n; j++ ) { 300 /* If NumCol = 1 then a copy is not required */ 301 if ((roworiented) && (n == 1)) { 302 barray = v + i*bs2; 303 } else if((!roworiented) && (m == 1)) { 304 barray = v + j*bs2; 305 } else { /* Here a copy is required */ 306 if (roworiented) { 307 value = v + i*(stepval+bs)*bs + j*bs; 308 } else { 309 value = v + j*(stepval+bs)*bs + i*bs; 310 } 311 for ( ii=0; ii<bs; ii++,value+=stepval ) { 312 for (jj=0; jj<bs; jj++ ) { 313 *barray++ = *value++; 314 } 315 } 316 barray -=bs2; 317 } 318 319 if (in[j] >= cstart && in[j] < cend){ 320 col = in[j] - cstart; 321 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322 } 323 #if defined(USE_PETSC_BOPT_g) 324 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326 #endif 327 else { 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 333 #if defined(USE_PETSC_BOPT_g) 334 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335 #endif 336 col = (baij->colmap[in[j]] - 1)/bs; 337 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339 col = in[j]; 340 } 341 } 342 else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (!baij->donotstash) { 348 if (roworiented ) { 349 row = im[i]*bs; 350 value = v + i*(stepval+bs)*bs; 351 for ( j=0; j<bs; j++,row++ ) { 352 for ( k=0; k<n; k++ ) { 353 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355 } 356 } 357 } 358 } else { 359 for ( j=0; j<n; j++ ) { 360 value = v + j*(stepval+bs)*bs + i*bs; 361 col = in[j]*bs; 362 for ( k=0; k<bs; k++,col++,value+=stepval) { 363 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 #include <math.h> 375 #define HASH_KEY 0.6180339887 376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNC__ 380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 384 int ierr,i,j,row,col; 385 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 386 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 387 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 388 double tmp; 389 Scalar ** HD = baij->hd,value; 390 #if defined(USE_PETSC_BOPT_g) 391 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 392 #endif 393 394 PetscFunctionBegin; 395 396 for ( i=0; i<m; i++ ) { 397 #if defined(USE_PETSC_BOPT_g) 398 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 399 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 400 #endif 401 row = im[i]; 402 if (row >= rstart_orig && row < rend_orig) { 403 for ( j=0; j<n; j++ ) { 404 col = in[j]; 405 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 406 /* Look up into the Hash Table */ 407 key = (row/bs)*Nbs+(col/bs)+1; 408 h1 = HASH(size,key,tmp); 409 410 411 idx = h1; 412 #if defined(USE_PETSC_BOPT_g) 413 insert_ct++; 414 total_ct++; 415 if (HT[idx] != key) { 416 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 417 if (idx == size) { 418 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 419 if (idx == h1) { 420 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 421 } 422 } 423 } 424 #else 425 if (HT[idx] != key) { 426 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 427 if (idx == size) { 428 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 429 if (idx == h1) { 430 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 431 } 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else { 440 if (roworiented && !baij->donotstash) { 441 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 442 } else { 443 if (!baij->donotstash) { 444 row = im[i]; 445 for ( j=0; j<n; j++ ) { 446 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 447 } 448 } 449 } 450 } 451 } 452 #if defined(USE_PETSC_BOPT_g) 453 baij->ht_total_ct = total_ct; 454 baij->ht_insert_ct = insert_ct; 455 #endif 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNC__ 460 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 461 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 462 { 463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 464 int ierr,i,j,ii,jj,row,col,k,l; 465 int roworiented = baij->roworiented,rstart=baij->rstart ; 466 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 467 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 468 double tmp; 469 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 470 #if defined(USE_PETSC_BOPT_g) 471 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 472 #endif 473 474 PetscFunctionBegin; 475 476 if (roworiented) { 477 stepval = (n-1)*bs; 478 } else { 479 stepval = (m-1)*bs; 480 } 481 for ( i=0; i<m; i++ ) { 482 #if defined(USE_PETSC_BOPT_g) 483 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 484 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 485 #endif 486 row = im[i]; 487 v_t = v + i*bs2; 488 if (row >= rstart && row < rend) { 489 for ( j=0; j<n; j++ ) { 490 col = in[j]; 491 492 /* Look up into the Hash Table */ 493 key = row*Nbs+col+1; 494 h1 = HASH(size,key,tmp); 495 496 idx = h1; 497 #if defined(USE_PETSC_BOPT_g) 498 total_ct++; 499 insert_ct++; 500 if (HT[idx] != key) { 501 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 502 if (idx == size) { 503 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 504 if (idx == h1) { 505 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 506 } 507 } 508 } 509 #else 510 if (HT[idx] != key) { 511 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 512 if (idx == size) { 513 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 514 if (idx == h1) { 515 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 516 } 517 } 518 } 519 #endif 520 baij_a = HD[idx]; 521 if (roworiented) { 522 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 523 /* value = v + (i*(stepval+bs)+j)*bs; */ 524 value = v_t; 525 v_t += bs; 526 if (addv == ADD_VALUES) { 527 for ( ii=0; ii<bs; ii++,value+=stepval) { 528 for ( jj=ii; jj<bs2; jj+=bs ) { 529 baij_a[jj] += *value++; 530 } 531 } 532 } else { 533 for ( ii=0; ii<bs; ii++,value+=stepval) { 534 for ( jj=ii; jj<bs2; jj+=bs ) { 535 baij_a[jj] = *value++; 536 } 537 } 538 } 539 } else { 540 value = v + j*(stepval+bs)*bs + i*bs; 541 if (addv == ADD_VALUES) { 542 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 543 for ( jj=0; jj<bs; jj++ ) { 544 baij_a[jj] += *value++; 545 } 546 } 547 } else { 548 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 549 for ( jj=0; jj<bs; jj++ ) { 550 baij_a[jj] = *value++; 551 } 552 } 553 } 554 } 555 } 556 } else { 557 if (!baij->donotstash) { 558 if (roworiented ) { 559 row = im[i]*bs; 560 value = v + i*(stepval+bs)*bs; 561 for ( j=0; j<bs; j++,row++ ) { 562 for ( k=0; k<n; k++ ) { 563 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 564 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 565 } 566 } 567 } 568 } else { 569 for ( j=0; j<n; j++ ) { 570 value = v + j*(stepval+bs)*bs + i*bs; 571 col = in[j]*bs; 572 for ( k=0; k<bs; k++,col++,value+=stepval) { 573 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 574 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 } 580 } 581 } 582 #if defined(USE_PETSC_BOPT_g) 583 baij->ht_total_ct = total_ct; 584 baij->ht_insert_ct = insert_ct; 585 #endif 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNC__ 590 #define __FUNC__ "MatGetValues_MPIBAIJ" 591 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 592 { 593 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 594 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 595 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 596 597 PetscFunctionBegin; 598 for ( i=0; i<m; i++ ) { 599 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 600 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 601 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 602 row = idxm[i] - bsrstart; 603 for ( j=0; j<n; j++ ) { 604 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 605 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 606 if (idxn[j] >= bscstart && idxn[j] < bscend){ 607 col = idxn[j] - bscstart; 608 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 609 } else { 610 if (!baij->colmap) { 611 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 612 } 613 if((baij->colmap[idxn[j]/bs]-1 < 0) || 614 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 615 else { 616 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 617 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 618 } 619 } 620 } 621 } else { 622 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 #undef __FUNC__ 629 #define __FUNC__ "MatNorm_MPIBAIJ" 630 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 631 { 632 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 633 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 634 int ierr, i,bs2=baij->bs2; 635 double sum = 0.0; 636 Scalar *v; 637 638 PetscFunctionBegin; 639 if (baij->size == 1) { 640 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 641 } else { 642 if (type == NORM_FROBENIUS) { 643 v = amat->a; 644 for (i=0; i<amat->nz*bs2; i++ ) { 645 #if defined(USE_PETSC_COMPLEX) 646 sum += real(conj(*v)*(*v)); v++; 647 #else 648 sum += (*v)*(*v); v++; 649 #endif 650 } 651 v = bmat->a; 652 for (i=0; i<bmat->nz*bs2; i++ ) { 653 #if defined(USE_PETSC_COMPLEX) 654 sum += real(conj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 660 *norm = sqrt(*norm); 661 } else { 662 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 663 } 664 } 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNC__ 669 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 670 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 673 MPI_Comm comm = mat->comm; 674 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 675 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 676 MPI_Request *send_waits,*recv_waits; 677 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 678 InsertMode addv; 679 Scalar *rvalues,*svalues; 680 681 PetscFunctionBegin; 682 if (baij->donotstash) { 683 baij->svalues = 0; baij->rvalues = 0; 684 baij->nsends = 0; baij->nrecvs = 0; 685 baij->send_waits = 0; baij->recv_waits = 0; 686 baij->rmax = 0; 687 PetscFunctionReturn(0); 688 } 689 690 /* make sure all processors are either in INSERTMODE or ADDMODE */ 691 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 692 if (addv == (ADD_VALUES|INSERT_VALUES)) { 693 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 694 } 695 mat->insertmode = addv; /* in case this processor had no cache */ 696 697 /* first count number of contributors to each processor */ 698 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 699 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 700 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 701 for ( i=0; i<baij->stash.n; i++ ) { 702 idx = baij->stash.idx[i]; 703 for ( j=0; j<size; j++ ) { 704 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 705 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 706 } 707 } 708 } 709 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 710 711 /* inform other processors of number of messages and max length*/ 712 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 713 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 714 nreceives = work[rank]; 715 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 716 nmax = work[rank]; 717 PetscFree(work); 718 719 /* post receives: 720 1) each message will consist of ordered pairs 721 (global index,value) we store the global index as a double 722 to simplify the message passing. 723 2) since we don't know how long each individual message is we 724 allocate the largest needed buffer for each receive. Potentially 725 this is a lot of wasted space. 726 727 728 This could be done better. 729 */ 730 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 731 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 732 for ( i=0; i<nreceives; i++ ) { 733 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 734 } 735 736 /* do sends: 737 1) starts[i] gives the starting index in svalues for stuff going to 738 the ith processor 739 */ 740 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 741 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 742 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 743 starts[0] = 0; 744 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 745 for ( i=0; i<baij->stash.n; i++ ) { 746 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 747 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 748 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 749 } 750 PetscFree(owner); 751 starts[0] = 0; 752 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 753 count = 0; 754 for ( i=0; i<size; i++ ) { 755 if (procs[i]) { 756 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 757 } 758 } 759 PetscFree(starts); PetscFree(nprocs); 760 761 /* Free cache space */ 762 PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 763 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 764 765 baij->svalues = svalues; baij->rvalues = rvalues; 766 baij->nsends = nsends; baij->nrecvs = nreceives; 767 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 768 baij->rmax = nmax; 769 770 PetscFunctionReturn(0); 771 } 772 773 /* 774 Creates the hash table, and sets the table 775 This table is created only once. 776 If new entried need to be added to the matrix 777 then the hash table has to be destroyed and 778 recreated. 779 */ 780 #undef __FUNC__ 781 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 782 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 783 { 784 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 785 Mat A = baij->A, B=baij->B; 786 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 787 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 788 int size,bs2=baij->bs2,rstart=baij->rstart; 789 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 790 int *HT,key; 791 Scalar **HD; 792 double tmp; 793 #if defined(USE_PETSC_BOPT_g) 794 int ct=0,max=0; 795 #endif 796 797 PetscFunctionBegin; 798 baij->ht_size=(int)(factor*nz); 799 size = baij->ht_size; 800 801 if (baij->ht) { 802 PetscFunctionReturn(0); 803 } 804 805 /* Allocate Memory for Hash Table */ 806 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 807 baij->ht = (int*)(baij->hd + size); 808 HD = baij->hd; 809 HT = baij->ht; 810 811 812 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 813 814 815 /* Loop Over A */ 816 for ( i=0; i<a->mbs; i++ ) { 817 for ( j=ai[i]; j<ai[i+1]; j++ ) { 818 row = i+rstart; 819 col = aj[j]+cstart; 820 821 key = row*Nbs + col + 1; 822 h1 = HASH(size,key,tmp); 823 for ( k=0; k<size; k++ ){ 824 if (HT[(h1+k)%size] == 0.0) { 825 HT[(h1+k)%size] = key; 826 HD[(h1+k)%size] = a->a + j*bs2; 827 break; 828 #if defined(USE_PETSC_BOPT_g) 829 } else { 830 ct++; 831 #endif 832 } 833 } 834 #if defined(USE_PETSC_BOPT_g) 835 if (k> max) max = k; 836 #endif 837 } 838 } 839 /* Loop Over B */ 840 for ( i=0; i<b->mbs; i++ ) { 841 for ( j=bi[i]; j<bi[i+1]; j++ ) { 842 row = i+rstart; 843 col = garray[bj[j]]; 844 key = row*Nbs + col + 1; 845 h1 = HASH(size,key,tmp); 846 for ( k=0; k<size; k++ ){ 847 if (HT[(h1+k)%size] == 0.0) { 848 HT[(h1+k)%size] = key; 849 HD[(h1+k)%size] = b->a + j*bs2; 850 break; 851 #if defined(USE_PETSC_BOPT_g) 852 } else { 853 ct++; 854 #endif 855 } 856 } 857 #if defined(USE_PETSC_BOPT_g) 858 if (k> max) max = k; 859 #endif 860 } 861 } 862 863 /* Print Summary */ 864 #if defined(USE_PETSC_BOPT_g) 865 for ( i=0,j=0; i<size; i++) 866 if (HT[i]) {j++;} 867 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 868 (j== 0)? 0.0:((double)(ct+j))/j,max); 869 #endif 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNC__ 874 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 875 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 876 { 877 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 878 MPI_Status *send_status,recv_status; 879 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 880 int bs=baij->bs,row,col,other_disassembled; 881 Scalar *values,val; 882 InsertMode addv = mat->insertmode; 883 884 PetscFunctionBegin; 885 /* wait on receives */ 886 while (count) { 887 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 888 /* unpack receives into our local space */ 889 values = baij->rvalues + 3*imdex*baij->rmax; 890 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 891 n = n/3; 892 for ( i=0; i<n; i++ ) { 893 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 894 col = (int) PetscReal(values[3*i+1]); 895 val = values[3*i+2]; 896 if (col >= baij->cstart*bs && col < baij->cend*bs) { 897 col -= baij->cstart*bs; 898 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 899 } else { 900 if (mat->was_assembled) { 901 if (!baij->colmap) { 902 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 903 } 904 col = (baij->colmap[col/bs]) - 1 + col%bs; 905 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 906 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 907 col = (int) PetscReal(values[3*i+1]); 908 } 909 } 910 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 911 } 912 } 913 count--; 914 } 915 if (baij->recv_waits) PetscFree(baij->recv_waits); 916 if (baij->rvalues) PetscFree(baij->rvalues); 917 918 /* wait on sends */ 919 if (baij->nsends) { 920 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 921 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 922 PetscFree(send_status); 923 } 924 if (baij->send_waits) PetscFree(baij->send_waits); 925 if (baij->svalues) PetscFree(baij->svalues); 926 927 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 928 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 929 930 /* determine if any processor has disassembled, if so we must 931 also disassemble ourselfs, in order that we may reassemble. */ 932 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 933 if (mat->was_assembled && !other_disassembled) { 934 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 935 } 936 937 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 938 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 939 } 940 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 941 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 942 943 #if defined(USE_PETSC_BOPT_g) 944 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 945 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 946 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 947 baij->ht_total_ct = 0; 948 baij->ht_insert_ct = 0; 949 } 950 #endif 951 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 952 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 953 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 954 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 955 } 956 957 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 958 PetscFunctionReturn(0); 959 } 960 961 #undef __FUNC__ 962 #define __FUNC__ "MatView_MPIBAIJ_Binary" 963 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 964 { 965 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 966 int ierr; 967 968 PetscFunctionBegin; 969 if (baij->size == 1) { 970 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 971 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNC__ 976 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 977 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 978 { 979 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 980 int ierr, format,rank,bs = baij->bs; 981 FILE *fd; 982 ViewerType vtype; 983 984 PetscFunctionBegin; 985 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 986 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 987 ierr = ViewerGetFormat(viewer,&format); 988 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 989 MatInfo info; 990 MPI_Comm_rank(mat->comm,&rank); 991 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 992 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 993 PetscSequentialPhaseBegin(mat->comm,1); 994 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 995 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 996 baij->bs,(int)info.memory); 997 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 998 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 999 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 1000 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 1001 fflush(fd); 1002 PetscSequentialPhaseEnd(mat->comm,1); 1003 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 1004 PetscFunctionReturn(0); 1005 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1006 PetscPrintf(mat->comm," block size is %d\n",bs); 1007 PetscFunctionReturn(0); 1008 } 1009 } 1010 1011 if (vtype == DRAW_VIEWER) { 1012 Draw draw; 1013 PetscTruth isnull; 1014 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 1015 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1016 } 1017 1018 if (vtype == ASCII_FILE_VIEWER) { 1019 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1020 PetscSequentialPhaseBegin(mat->comm,1); 1021 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 1022 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 1023 baij->cstart*bs,baij->cend*bs); 1024 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1025 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 1026 fflush(fd); 1027 PetscSequentialPhaseEnd(mat->comm,1); 1028 } else { 1029 int size = baij->size; 1030 rank = baij->rank; 1031 if (size == 1) { 1032 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1033 } else { 1034 /* assemble the entire matrix onto first processor. */ 1035 Mat A; 1036 Mat_SeqBAIJ *Aloc; 1037 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 1038 int mbs=baij->mbs; 1039 Scalar *a; 1040 1041 if (!rank) { 1042 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1043 } else { 1044 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1045 } 1046 PLogObjectParent(mat,A); 1047 1048 /* copy over the A part */ 1049 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1050 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1051 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1052 1053 for ( i=0; i<mbs; i++ ) { 1054 rvals[0] = bs*(baij->rstart + i); 1055 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1056 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1057 col = (baij->cstart+aj[j])*bs; 1058 for (k=0; k<bs; k++ ) { 1059 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1060 col++; a += bs; 1061 } 1062 } 1063 } 1064 /* copy over the B part */ 1065 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1066 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1067 for ( i=0; i<mbs; i++ ) { 1068 rvals[0] = bs*(baij->rstart + i); 1069 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1070 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1071 col = baij->garray[aj[j]]*bs; 1072 for (k=0; k<bs; k++ ) { 1073 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1074 col++; a += bs; 1075 } 1076 } 1077 } 1078 PetscFree(rvals); 1079 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1080 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1081 /* 1082 Everyone has to call to draw the matrix since the graphics waits are 1083 synchronized across all processors that share the Draw object 1084 */ 1085 if (!rank || vtype == DRAW_VIEWER) { 1086 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1087 } 1088 ierr = MatDestroy(A); CHKERRQ(ierr); 1089 } 1090 } 1091 PetscFunctionReturn(0); 1092 } 1093 1094 1095 1096 #undef __FUNC__ 1097 #define __FUNC__ "MatView_MPIBAIJ" 1098 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1099 { 1100 int ierr; 1101 ViewerType vtype; 1102 1103 PetscFunctionBegin; 1104 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1105 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 1106 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 1107 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1108 } else if (vtype == BINARY_FILE_VIEWER) { 1109 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1110 } else { 1111 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1112 } 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNC__ 1117 #define __FUNC__ "MatDestroy_MPIBAIJ" 1118 int MatDestroy_MPIBAIJ(Mat mat) 1119 { 1120 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1121 int ierr; 1122 1123 PetscFunctionBegin; 1124 #if defined(USE_PETSC_LOG) 1125 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1126 #endif 1127 1128 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1129 PetscFree(baij->rowners); 1130 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1131 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1132 if (baij->colmap) PetscFree(baij->colmap); 1133 if (baij->garray) PetscFree(baij->garray); 1134 if (baij->lvec) VecDestroy(baij->lvec); 1135 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1136 if (baij->rowvalues) PetscFree(baij->rowvalues); 1137 if (baij->barray) PetscFree(baij->barray); 1138 if (baij->hd) PetscFree(baij->hd); 1139 PetscFree(baij); 1140 PLogObjectDestroy(mat); 1141 PetscHeaderDestroy(mat); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNC__ 1146 #define __FUNC__ "MatMult_MPIBAIJ" 1147 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1150 int ierr, nt; 1151 1152 PetscFunctionBegin; 1153 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1154 if (nt != a->n) { 1155 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1156 } 1157 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1158 if (nt != a->m) { 1159 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1160 } 1161 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1162 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1163 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1164 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1165 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNC__ 1170 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1171 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1172 { 1173 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1174 int ierr; 1175 1176 PetscFunctionBegin; 1177 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1178 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1179 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1180 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 #undef __FUNC__ 1185 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1186 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1187 { 1188 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1189 int ierr; 1190 1191 PetscFunctionBegin; 1192 /* do nondiagonal part */ 1193 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1194 /* send it on its way */ 1195 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1196 /* do local part */ 1197 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1198 /* receive remote parts: note this assumes the values are not actually */ 1199 /* inserted in yy until the next line, which is true for my implementation*/ 1200 /* but is not perhaps always true. */ 1201 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNC__ 1206 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1207 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1208 { 1209 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1210 int ierr; 1211 1212 PetscFunctionBegin; 1213 /* do nondiagonal part */ 1214 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1215 /* send it on its way */ 1216 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1217 /* do local part */ 1218 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1219 /* receive remote parts: note this assumes the values are not actually */ 1220 /* inserted in yy until the next line, which is true for my implementation*/ 1221 /* but is not perhaps always true. */ 1222 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 /* 1227 This only works correctly for square matrices where the subblock A->A is the 1228 diagonal block 1229 */ 1230 #undef __FUNC__ 1231 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1232 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1233 { 1234 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1235 int ierr; 1236 1237 PetscFunctionBegin; 1238 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1239 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNC__ 1244 #define __FUNC__ "MatScale_MPIBAIJ" 1245 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1246 { 1247 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1248 int ierr; 1249 1250 PetscFunctionBegin; 1251 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1252 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNC__ 1257 #define __FUNC__ "MatGetSize_MPIBAIJ" 1258 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1259 { 1260 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1261 1262 PetscFunctionBegin; 1263 if (m) *m = mat->M; 1264 if (n) *n = mat->N; 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNC__ 1269 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1270 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1271 { 1272 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1273 1274 PetscFunctionBegin; 1275 *m = mat->m; *n = mat->n; 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNC__ 1280 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1281 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1282 { 1283 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1284 1285 PetscFunctionBegin; 1286 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1287 PetscFunctionReturn(0); 1288 } 1289 1290 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1291 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1292 1293 #undef __FUNC__ 1294 #define __FUNC__ "MatGetRow_MPIBAIJ" 1295 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1296 { 1297 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1298 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1299 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1300 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1301 int *cmap, *idx_p,cstart = mat->cstart; 1302 1303 PetscFunctionBegin; 1304 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1305 mat->getrowactive = PETSC_TRUE; 1306 1307 if (!mat->rowvalues && (idx || v)) { 1308 /* 1309 allocate enough space to hold information from the longest row. 1310 */ 1311 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1312 int max = 1,mbs = mat->mbs,tmp; 1313 for ( i=0; i<mbs; i++ ) { 1314 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1315 if (max < tmp) { max = tmp; } 1316 } 1317 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1318 CHKPTRQ(mat->rowvalues); 1319 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1320 } 1321 1322 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1323 lrow = row - brstart; 1324 1325 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1326 if (!v) {pvA = 0; pvB = 0;} 1327 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1328 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1329 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1330 nztot = nzA + nzB; 1331 1332 cmap = mat->garray; 1333 if (v || idx) { 1334 if (nztot) { 1335 /* Sort by increasing column numbers, assuming A and B already sorted */ 1336 int imark = -1; 1337 if (v) { 1338 *v = v_p = mat->rowvalues; 1339 for ( i=0; i<nzB; i++ ) { 1340 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1341 else break; 1342 } 1343 imark = i; 1344 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1345 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1346 } 1347 if (idx) { 1348 *idx = idx_p = mat->rowindices; 1349 if (imark > -1) { 1350 for ( i=0; i<imark; i++ ) { 1351 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1352 } 1353 } else { 1354 for ( i=0; i<nzB; i++ ) { 1355 if (cmap[cworkB[i]/bs] < cstart) 1356 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1357 else break; 1358 } 1359 imark = i; 1360 } 1361 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1362 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1363 } 1364 } else { 1365 if (idx) *idx = 0; 1366 if (v) *v = 0; 1367 } 1368 } 1369 *nz = nztot; 1370 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1371 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNC__ 1376 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1377 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1378 { 1379 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1380 1381 PetscFunctionBegin; 1382 if (baij->getrowactive == PETSC_FALSE) { 1383 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1384 } 1385 baij->getrowactive = PETSC_FALSE; 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #undef __FUNC__ 1390 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1391 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1392 { 1393 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1394 1395 PetscFunctionBegin; 1396 *bs = baij->bs; 1397 PetscFunctionReturn(0); 1398 } 1399 1400 #undef __FUNC__ 1401 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1402 int MatZeroEntries_MPIBAIJ(Mat A) 1403 { 1404 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1405 int ierr; 1406 1407 PetscFunctionBegin; 1408 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1409 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNC__ 1414 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1415 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1416 { 1417 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1418 Mat A = a->A, B = a->B; 1419 int ierr; 1420 double isend[5], irecv[5]; 1421 1422 PetscFunctionBegin; 1423 info->block_size = (double)a->bs; 1424 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1425 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1426 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1427 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1428 if (flag == MAT_LOCAL) { 1429 info->nz_used = isend[0]; 1430 info->nz_allocated = isend[1]; 1431 info->nz_unneeded = isend[2]; 1432 info->memory = isend[3]; 1433 info->mallocs = isend[4]; 1434 } else if (flag == MAT_GLOBAL_MAX) { 1435 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 1436 info->nz_used = irecv[0]; 1437 info->nz_allocated = irecv[1]; 1438 info->nz_unneeded = irecv[2]; 1439 info->memory = irecv[3]; 1440 info->mallocs = irecv[4]; 1441 } else if (flag == MAT_GLOBAL_SUM) { 1442 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 1443 info->nz_used = irecv[0]; 1444 info->nz_allocated = irecv[1]; 1445 info->nz_unneeded = irecv[2]; 1446 info->memory = irecv[3]; 1447 info->mallocs = irecv[4]; 1448 } 1449 info->rows_global = (double)a->M; 1450 info->columns_global = (double)a->N; 1451 info->rows_local = (double)a->m; 1452 info->columns_local = (double)a->N; 1453 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1454 info->fill_ratio_needed = 0; 1455 info->factor_mallocs = 0; 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNC__ 1460 #define __FUNC__ "MatSetOption_MPIBAIJ" 1461 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1462 { 1463 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1464 1465 PetscFunctionBegin; 1466 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1467 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1468 op == MAT_COLUMNS_UNSORTED || 1469 op == MAT_COLUMNS_SORTED || 1470 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1471 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1472 MatSetOption(a->A,op); 1473 MatSetOption(a->B,op); 1474 } else if (op == MAT_ROW_ORIENTED) { 1475 a->roworiented = 1; 1476 MatSetOption(a->A,op); 1477 MatSetOption(a->B,op); 1478 } else if (op == MAT_ROWS_SORTED || 1479 op == MAT_ROWS_UNSORTED || 1480 op == MAT_SYMMETRIC || 1481 op == MAT_STRUCTURALLY_SYMMETRIC || 1482 op == MAT_YES_NEW_DIAGONALS || 1483 op == MAT_USE_HASH_TABLE) 1484 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1485 else if (op == MAT_COLUMN_ORIENTED) { 1486 a->roworiented = 0; 1487 MatSetOption(a->A,op); 1488 MatSetOption(a->B,op); 1489 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1490 a->donotstash = 1; 1491 } else if (op == MAT_NO_NEW_DIAGONALS) { 1492 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1493 } else if (op == MAT_USE_HASH_TABLE) { 1494 a->ht_flag = 1; 1495 } else { 1496 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1497 } 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNC__ 1502 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1503 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1504 { 1505 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1506 Mat_SeqBAIJ *Aloc; 1507 Mat B; 1508 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1509 int bs=baij->bs,mbs=baij->mbs; 1510 Scalar *a; 1511 1512 PetscFunctionBegin; 1513 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1514 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1515 CHKERRQ(ierr); 1516 1517 /* copy over the A part */ 1518 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1519 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1520 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1521 1522 for ( i=0; i<mbs; i++ ) { 1523 rvals[0] = bs*(baij->rstart + i); 1524 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1525 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1526 col = (baij->cstart+aj[j])*bs; 1527 for (k=0; k<bs; k++ ) { 1528 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1529 col++; a += bs; 1530 } 1531 } 1532 } 1533 /* copy over the B part */ 1534 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1535 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1536 for ( i=0; i<mbs; i++ ) { 1537 rvals[0] = bs*(baij->rstart + i); 1538 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1539 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1540 col = baij->garray[aj[j]]*bs; 1541 for (k=0; k<bs; k++ ) { 1542 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1543 col++; a += bs; 1544 } 1545 } 1546 } 1547 PetscFree(rvals); 1548 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1549 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1550 1551 if (matout != PETSC_NULL) { 1552 *matout = B; 1553 } else { 1554 PetscOps *Abops; 1555 struct _MatOps *Aops; 1556 1557 /* This isn't really an in-place transpose .... but free data structures from baij */ 1558 PetscFree(baij->rowners); 1559 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1560 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1561 if (baij->colmap) PetscFree(baij->colmap); 1562 if (baij->garray) PetscFree(baij->garray); 1563 if (baij->lvec) VecDestroy(baij->lvec); 1564 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1565 PetscFree(baij); 1566 1567 /* 1568 This is horrible, horrible code. We need to keep the 1569 A pointers for the bops and ops but copy everything 1570 else from C. 1571 */ 1572 Abops = A->bops; 1573 Aops = A->ops; 1574 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1575 A->bops = Abops; 1576 A->ops = Aops; 1577 1578 PetscHeaderDestroy(B); 1579 } 1580 PetscFunctionReturn(0); 1581 } 1582 1583 #undef __FUNC__ 1584 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1585 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1586 { 1587 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1588 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1589 int ierr,s1,s2,s3; 1590 1591 PetscFunctionBegin; 1592 if (ll) { 1593 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1594 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1595 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1596 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1597 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1598 } 1599 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1600 PetscFunctionReturn(0); 1601 } 1602 1603 #undef __FUNC__ 1604 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1605 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1606 { 1607 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1608 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1609 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1610 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1611 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1612 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1613 MPI_Comm comm = A->comm; 1614 MPI_Request *send_waits,*recv_waits; 1615 MPI_Status recv_status,*send_status; 1616 IS istmp; 1617 PetscTruth localdiag; 1618 1619 PetscFunctionBegin; 1620 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1621 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1622 1623 /* first count number of contributors to each processor */ 1624 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1625 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1626 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1627 for ( i=0; i<N; i++ ) { 1628 idx = rows[i]; 1629 found = 0; 1630 for ( j=0; j<size; j++ ) { 1631 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1632 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1633 } 1634 } 1635 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1636 } 1637 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1638 1639 /* inform other processors of number of messages and max length*/ 1640 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1641 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1642 nrecvs = work[rank]; 1643 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1644 nmax = work[rank]; 1645 PetscFree(work); 1646 1647 /* post receives: */ 1648 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1649 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1650 for ( i=0; i<nrecvs; i++ ) { 1651 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1652 } 1653 1654 /* do sends: 1655 1) starts[i] gives the starting index in svalues for stuff going to 1656 the ith processor 1657 */ 1658 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1659 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1660 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1661 starts[0] = 0; 1662 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1663 for ( i=0; i<N; i++ ) { 1664 svalues[starts[owner[i]]++] = rows[i]; 1665 } 1666 ISRestoreIndices(is,&rows); 1667 1668 starts[0] = 0; 1669 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1670 count = 0; 1671 for ( i=0; i<size; i++ ) { 1672 if (procs[i]) { 1673 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1674 } 1675 } 1676 PetscFree(starts); 1677 1678 base = owners[rank]*bs; 1679 1680 /* wait on receives */ 1681 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1682 source = lens + nrecvs; 1683 count = nrecvs; slen = 0; 1684 while (count) { 1685 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1686 /* unpack receives into our local space */ 1687 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1688 source[imdex] = recv_status.MPI_SOURCE; 1689 lens[imdex] = n; 1690 slen += n; 1691 count--; 1692 } 1693 PetscFree(recv_waits); 1694 1695 /* move the data into the send scatter */ 1696 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1697 count = 0; 1698 for ( i=0; i<nrecvs; i++ ) { 1699 values = rvalues + i*nmax; 1700 for ( j=0; j<lens[i]; j++ ) { 1701 lrows[count++] = values[j] - base; 1702 } 1703 } 1704 PetscFree(rvalues); PetscFree(lens); 1705 PetscFree(owner); PetscFree(nprocs); 1706 1707 /* actually zap the local rows */ 1708 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1709 PLogObjectParent(A,istmp); 1710 1711 /* 1712 Zero the required rows. If the "diagonal block" of the matrix 1713 is square and the user wishes to set the diagonal we use seperate 1714 code so that MatSetValues() is not called for each diagonal allocating 1715 new memory, thus calling lots of mallocs and slowing things down. 1716 1717 Contributed by: Mathew Knepley 1718 */ 1719 localdiag = PETSC_FALSE; 1720 if (diag && (l->A->M == l->A->N)) { 1721 localdiag = PETSC_TRUE; 1722 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1723 } else { 1724 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1725 } 1726 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1727 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1728 1729 if (diag && (localdiag == PETSC_FALSE)) { 1730 for ( i = 0; i < slen; i++ ) { 1731 row = lrows[i] + rstart_bs; 1732 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1733 } 1734 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1735 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1736 } 1737 PetscFree(lrows); 1738 1739 /* wait on sends */ 1740 if (nsends) { 1741 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1742 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1743 PetscFree(send_status); 1744 } 1745 PetscFree(send_waits); PetscFree(svalues); 1746 1747 PetscFunctionReturn(0); 1748 } 1749 1750 extern int MatPrintHelp_SeqBAIJ(Mat); 1751 #undef __FUNC__ 1752 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1753 int MatPrintHelp_MPIBAIJ(Mat A) 1754 { 1755 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1756 MPI_Comm comm = A->comm; 1757 static int called = 0; 1758 int ierr; 1759 1760 PetscFunctionBegin; 1761 if (!a->rank) { 1762 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1763 } 1764 if (called) {PetscFunctionReturn(0);} else called = 1; 1765 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1766 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1767 PetscFunctionReturn(0); 1768 } 1769 1770 #undef __FUNC__ 1771 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1772 int MatSetUnfactored_MPIBAIJ(Mat A) 1773 { 1774 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1775 int ierr; 1776 1777 PetscFunctionBegin; 1778 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1779 PetscFunctionReturn(0); 1780 } 1781 1782 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1783 1784 /* -------------------------------------------------------------------*/ 1785 static struct _MatOps MatOps = { 1786 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1787 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1788 0,0,0,0, 1789 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1790 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1791 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1792 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1793 0,0,0,MatGetSize_MPIBAIJ, 1794 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1795 0,0,MatConvertSameType_MPIBAIJ,0,0, 1796 0,0,0,MatGetSubMatrices_MPIBAIJ, 1797 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1798 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1799 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1800 1801 1802 #undef __FUNC__ 1803 #define __FUNC__ "MatCreateMPIBAIJ" 1804 /*@C 1805 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1806 (block compressed row). For good matrix assembly performance 1807 the user should preallocate the matrix storage by setting the parameters 1808 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1809 performance can be increased by more than a factor of 50. 1810 1811 Collective on MPI_Comm 1812 1813 Input Parameters: 1814 + comm - MPI communicator 1815 . bs - size of blockk 1816 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1817 This value should be the same as the local size used in creating the 1818 y vector for the matrix-vector product y = Ax. 1819 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1820 This value should be the same as the local size used in creating the 1821 x vector for the matrix-vector product y = Ax. 1822 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1823 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1824 . d_nz - number of block nonzeros per block row in diagonal portion of local 1825 submatrix (same for all local rows) 1826 . d_nzz - array containing the number of block nonzeros in the various block rows 1827 of the in diagonal portion of the local (possibly different for each block 1828 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1829 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1830 submatrix (same for all local rows). 1831 - o_nzz - array containing the number of nonzeros in the various block rows of the 1832 off-diagonal portion of the local submatrix (possibly different for 1833 each block row) or PETSC_NULL. 1834 1835 Output Parameter: 1836 . A - the matrix 1837 1838 Options Database Keys: 1839 . -mat_no_unroll - uses code that does not unroll the loops in the 1840 block calculations (much slower) 1841 . -mat_block_size - size of the blocks to use 1842 1843 Notes: 1844 The user MUST specify either the local or global matrix dimensions 1845 (possibly both). 1846 1847 Storage Information: 1848 For a square global matrix we define each processor's diagonal portion 1849 to be its local rows and the corresponding columns (a square submatrix); 1850 each processor's off-diagonal portion encompasses the remainder of the 1851 local matrix (a rectangular submatrix). 1852 1853 The user can specify preallocated storage for the diagonal part of 1854 the local submatrix with either d_nz or d_nnz (not both). Set 1855 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1856 memory allocation. Likewise, specify preallocated storage for the 1857 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1858 1859 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1860 the figure below we depict these three local rows and all columns (0-11). 1861 1862 .vb 1863 0 1 2 3 4 5 6 7 8 9 10 11 1864 ------------------- 1865 row 3 | o o o d d d o o o o o o 1866 row 4 | o o o d d d o o o o o o 1867 row 5 | o o o d d d o o o o o o 1868 ------------------- 1869 .ve 1870 1871 Thus, any entries in the d locations are stored in the d (diagonal) 1872 submatrix, and any entries in the o locations are stored in the 1873 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1874 stored simply in the MATSEQBAIJ format for compressed row storage. 1875 1876 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1877 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1878 In general, for PDE problems in which most nonzeros are near the diagonal, 1879 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1880 or you will get TERRIBLE performance; see the users' manual chapter on 1881 matrices. 1882 1883 .keywords: matrix, block, aij, compressed row, sparse, parallel 1884 1885 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1886 @*/ 1887 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1888 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1889 { 1890 Mat B; 1891 Mat_MPIBAIJ *b; 1892 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1893 1894 PetscFunctionBegin; 1895 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1896 1897 MPI_Comm_size(comm,&size); 1898 if (size == 1) { 1899 if (M == PETSC_DECIDE) M = m; 1900 if (N == PETSC_DECIDE) N = n; 1901 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1902 PetscFunctionReturn(0); 1903 } 1904 1905 *A = 0; 1906 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1907 PLogObjectCreate(B); 1908 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1909 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1910 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1911 1912 B->ops->destroy = MatDestroy_MPIBAIJ; 1913 B->ops->view = MatView_MPIBAIJ; 1914 B->mapping = 0; 1915 B->factor = 0; 1916 B->assembled = PETSC_FALSE; 1917 1918 B->insertmode = NOT_SET_VALUES; 1919 MPI_Comm_rank(comm,&b->rank); 1920 MPI_Comm_size(comm,&b->size); 1921 1922 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1923 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1924 } 1925 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1926 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1927 } 1928 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1929 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1930 } 1931 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1932 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1933 1934 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1935 work[0] = m; work[1] = n; 1936 mbs = m/bs; nbs = n/bs; 1937 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1938 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1939 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1940 } 1941 if (m == PETSC_DECIDE) { 1942 Mbs = M/bs; 1943 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1944 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1945 m = mbs*bs; 1946 } 1947 if (n == PETSC_DECIDE) { 1948 Nbs = N/bs; 1949 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1950 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1951 n = nbs*bs; 1952 } 1953 if (mbs*bs != m || nbs*bs != n) { 1954 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1955 } 1956 1957 b->m = m; B->m = m; 1958 b->n = n; B->n = n; 1959 b->N = N; B->N = N; 1960 b->M = M; B->M = M; 1961 b->bs = bs; 1962 b->bs2 = bs*bs; 1963 b->mbs = mbs; 1964 b->nbs = nbs; 1965 b->Mbs = Mbs; 1966 b->Nbs = Nbs; 1967 1968 /* build local table of row and column ownerships */ 1969 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1970 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1971 b->cowners = b->rowners + b->size + 2; 1972 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1973 b->rowners[0] = 0; 1974 for ( i=2; i<=b->size; i++ ) { 1975 b->rowners[i] += b->rowners[i-1]; 1976 } 1977 b->rstart = b->rowners[b->rank]; 1978 b->rend = b->rowners[b->rank+1]; 1979 b->rstart_bs = b->rstart * bs; 1980 b->rend_bs = b->rend * bs; 1981 1982 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1983 b->cowners[0] = 0; 1984 for ( i=2; i<=b->size; i++ ) { 1985 b->cowners[i] += b->cowners[i-1]; 1986 } 1987 b->cstart = b->cowners[b->rank]; 1988 b->cend = b->cowners[b->rank+1]; 1989 b->cstart_bs = b->cstart * bs; 1990 b->cend_bs = b->cend * bs; 1991 1992 1993 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1994 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1995 PLogObjectParent(B,b->A); 1996 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1997 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1998 PLogObjectParent(B,b->B); 1999 2000 /* build cache for off array entries formed */ 2001 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 2002 b->donotstash = 0; 2003 b->colmap = 0; 2004 b->garray = 0; 2005 b->roworiented = 1; 2006 2007 /* stuff used in block assembly */ 2008 b->barray = 0; 2009 2010 /* stuff used for matrix vector multiply */ 2011 b->lvec = 0; 2012 b->Mvctx = 0; 2013 2014 /* stuff for MatGetRow() */ 2015 b->rowindices = 0; 2016 b->rowvalues = 0; 2017 b->getrowactive = PETSC_FALSE; 2018 2019 /* hash table stuff */ 2020 b->ht = 0; 2021 b->hd = 0; 2022 b->ht_size = 0; 2023 b->ht_flag = 0; 2024 b->ht_fact = 0; 2025 b->ht_total_ct = 0; 2026 b->ht_insert_ct = 0; 2027 2028 *A = B; 2029 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2030 if (flg) { 2031 double fact = 1.39; 2032 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2033 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2034 if (fact <= 1.0) fact = 1.39; 2035 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2036 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2037 } 2038 PetscFunctionReturn(0); 2039 } 2040 2041 #undef __FUNC__ 2042 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 2043 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 2044 { 2045 Mat mat; 2046 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2047 int ierr, len=0, flg; 2048 2049 PetscFunctionBegin; 2050 *newmat = 0; 2051 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 2052 PLogObjectCreate(mat); 2053 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2054 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 2055 mat->ops->destroy = MatDestroy_MPIBAIJ; 2056 mat->ops->view = MatView_MPIBAIJ; 2057 mat->factor = matin->factor; 2058 mat->assembled = PETSC_TRUE; 2059 2060 a->m = mat->m = oldmat->m; 2061 a->n = mat->n = oldmat->n; 2062 a->M = mat->M = oldmat->M; 2063 a->N = mat->N = oldmat->N; 2064 2065 a->bs = oldmat->bs; 2066 a->bs2 = oldmat->bs2; 2067 a->mbs = oldmat->mbs; 2068 a->nbs = oldmat->nbs; 2069 a->Mbs = oldmat->Mbs; 2070 a->Nbs = oldmat->Nbs; 2071 2072 a->rstart = oldmat->rstart; 2073 a->rend = oldmat->rend; 2074 a->cstart = oldmat->cstart; 2075 a->cend = oldmat->cend; 2076 a->size = oldmat->size; 2077 a->rank = oldmat->rank; 2078 mat->insertmode = NOT_SET_VALUES; 2079 a->rowvalues = 0; 2080 a->getrowactive = PETSC_FALSE; 2081 a->barray = 0; 2082 2083 /* hash table stuff */ 2084 a->ht = 0; 2085 a->hd = 0; 2086 a->ht_size = 0; 2087 a->ht_flag = oldmat->ht_flag; 2088 a->ht_fact = oldmat->ht_fact; 2089 a->ht_total_ct = 0; 2090 a->ht_insert_ct = 0; 2091 2092 2093 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2094 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2095 a->cowners = a->rowners + a->size + 2; 2096 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2097 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 2098 if (oldmat->colmap) { 2099 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2100 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2101 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2102 } else a->colmap = 0; 2103 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2104 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2105 PLogObjectMemory(mat,len*sizeof(int)); 2106 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2107 } else a->garray = 0; 2108 2109 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2110 PLogObjectParent(mat,a->lvec); 2111 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2112 PLogObjectParent(mat,a->Mvctx); 2113 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 2114 PLogObjectParent(mat,a->A); 2115 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 2116 PLogObjectParent(mat,a->B); 2117 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2118 if (flg) { 2119 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2120 } 2121 *newmat = mat; 2122 PetscFunctionReturn(0); 2123 } 2124 2125 #include "sys.h" 2126 2127 #undef __FUNC__ 2128 #define __FUNC__ "MatLoad_MPIBAIJ" 2129 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2130 { 2131 Mat A; 2132 int i, nz, ierr, j,rstart, rend, fd; 2133 Scalar *vals,*buf; 2134 MPI_Comm comm = ((PetscObject)viewer)->comm; 2135 MPI_Status status; 2136 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2137 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2138 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2139 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2140 int dcount,kmax,k,nzcount,tmp; 2141 2142 PetscFunctionBegin; 2143 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2144 2145 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2146 if (!rank) { 2147 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2148 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2149 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2150 if (header[3] < 0) { 2151 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2152 } 2153 } 2154 2155 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2156 M = header[1]; N = header[2]; 2157 2158 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2159 2160 /* 2161 This code adds extra rows to make sure the number of rows is 2162 divisible by the blocksize 2163 */ 2164 Mbs = M/bs; 2165 extra_rows = bs - M + bs*(Mbs); 2166 if (extra_rows == bs) extra_rows = 0; 2167 else Mbs++; 2168 if (extra_rows &&!rank) { 2169 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2170 } 2171 2172 /* determine ownership of all rows */ 2173 mbs = Mbs/size + ((Mbs % size) > rank); 2174 m = mbs * bs; 2175 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2176 browners = rowners + size + 1; 2177 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2178 rowners[0] = 0; 2179 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2180 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2181 rstart = rowners[rank]; 2182 rend = rowners[rank+1]; 2183 2184 /* distribute row lengths to all processors */ 2185 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2186 if (!rank) { 2187 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2188 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2189 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2190 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2191 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2192 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2193 PetscFree(sndcounts); 2194 } else { 2195 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2196 } 2197 2198 if (!rank) { 2199 /* calculate the number of nonzeros on each processor */ 2200 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2201 PetscMemzero(procsnz,size*sizeof(int)); 2202 for ( i=0; i<size; i++ ) { 2203 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2204 procsnz[i] += rowlengths[j]; 2205 } 2206 } 2207 PetscFree(rowlengths); 2208 2209 /* determine max buffer needed and allocate it */ 2210 maxnz = 0; 2211 for ( i=0; i<size; i++ ) { 2212 maxnz = PetscMax(maxnz,procsnz[i]); 2213 } 2214 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2215 2216 /* read in my part of the matrix column indices */ 2217 nz = procsnz[0]; 2218 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2219 mycols = ibuf; 2220 if (size == 1) nz -= extra_rows; 2221 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2222 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2223 2224 /* read in every ones (except the last) and ship off */ 2225 for ( i=1; i<size-1; i++ ) { 2226 nz = procsnz[i]; 2227 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2228 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2229 } 2230 /* read in the stuff for the last proc */ 2231 if ( size != 1 ) { 2232 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2233 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2234 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2235 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2236 } 2237 PetscFree(cols); 2238 } else { 2239 /* determine buffer space needed for message */ 2240 nz = 0; 2241 for ( i=0; i<m; i++ ) { 2242 nz += locrowlens[i]; 2243 } 2244 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2245 mycols = ibuf; 2246 /* receive message of column indices*/ 2247 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2248 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2249 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2250 } 2251 2252 /* loop over local rows, determining number of off diagonal entries */ 2253 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2254 odlens = dlens + (rend-rstart); 2255 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2256 PetscMemzero(mask,3*Mbs*sizeof(int)); 2257 masked1 = mask + Mbs; 2258 masked2 = masked1 + Mbs; 2259 rowcount = 0; nzcount = 0; 2260 for ( i=0; i<mbs; i++ ) { 2261 dcount = 0; 2262 odcount = 0; 2263 for ( j=0; j<bs; j++ ) { 2264 kmax = locrowlens[rowcount]; 2265 for ( k=0; k<kmax; k++ ) { 2266 tmp = mycols[nzcount++]/bs; 2267 if (!mask[tmp]) { 2268 mask[tmp] = 1; 2269 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2270 else masked1[dcount++] = tmp; 2271 } 2272 } 2273 rowcount++; 2274 } 2275 2276 dlens[i] = dcount; 2277 odlens[i] = odcount; 2278 2279 /* zero out the mask elements we set */ 2280 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2281 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2282 } 2283 2284 /* create our matrix */ 2285 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2286 CHKERRQ(ierr); 2287 A = *newmat; 2288 MatSetOption(A,MAT_COLUMNS_SORTED); 2289 2290 if (!rank) { 2291 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2292 /* read in my part of the matrix numerical values */ 2293 nz = procsnz[0]; 2294 vals = buf; 2295 mycols = ibuf; 2296 if (size == 1) nz -= extra_rows; 2297 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2298 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2299 2300 /* insert into matrix */ 2301 jj = rstart*bs; 2302 for ( i=0; i<m; i++ ) { 2303 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2304 mycols += locrowlens[i]; 2305 vals += locrowlens[i]; 2306 jj++; 2307 } 2308 /* read in other processors (except the last one) and ship out */ 2309 for ( i=1; i<size-1; i++ ) { 2310 nz = procsnz[i]; 2311 vals = buf; 2312 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2313 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2314 } 2315 /* the last proc */ 2316 if ( size != 1 ){ 2317 nz = procsnz[i] - extra_rows; 2318 vals = buf; 2319 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2320 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2321 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2322 } 2323 PetscFree(procsnz); 2324 } else { 2325 /* receive numeric values */ 2326 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2327 2328 /* receive message of values*/ 2329 vals = buf; 2330 mycols = ibuf; 2331 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2332 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2333 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2334 2335 /* insert into matrix */ 2336 jj = rstart*bs; 2337 for ( i=0; i<m; i++ ) { 2338 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2339 mycols += locrowlens[i]; 2340 vals += locrowlens[i]; 2341 jj++; 2342 } 2343 } 2344 PetscFree(locrowlens); 2345 PetscFree(buf); 2346 PetscFree(ibuf); 2347 PetscFree(rowners); 2348 PetscFree(dlens); 2349 PetscFree(mask); 2350 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2351 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2352 PetscFunctionReturn(0); 2353 } 2354 2355 2356 2357 #undef __FUNC__ 2358 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2359 /*@ 2360 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2361 2362 Input Parameters: 2363 . mat - the matrix 2364 . fact - factor 2365 2366 Collective on Mat 2367 2368 Notes: 2369 This can also be set by the command line option: -mat_use_hash_table fact 2370 2371 .keywords: matrix, hashtable, factor, HT 2372 2373 .seealso: MatSetOption() 2374 @*/ 2375 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2376 { 2377 Mat_MPIBAIJ *baij; 2378 2379 PetscFunctionBegin; 2380 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2381 if (mat->type != MATMPIBAIJ) { 2382 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2383 } 2384 baij = (Mat_MPIBAIJ*) mat->data; 2385 baij->ht_fact = fact; 2386 PetscFunctionReturn(0); 2387 } 2388