1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.125 1998/05/29 18:52:46 bsmith Exp balay $"; 3 #endif 4 5 #include "pinclude/pviewer.h" /*I "mat.h" I*/ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 23 static int CreateColmap_MPIBAIJ_Private(Mat mat) 24 { 25 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 26 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27 int nbs = B->nbs,i,bs=B->bs;; 28 29 PetscFunctionBegin; 30 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 31 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 32 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 34 PetscFunctionReturn(0); 35 } 36 37 #define CHUNKSIZE 10 38 39 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 40 { \ 41 \ 42 brow = row/bs; \ 43 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44 rmax = aimax[brow]; nrow = ailen[brow]; \ 45 bcol = col/bs; \ 46 ridx = row % bs; cidx = col % bs; \ 47 low = 0; high = nrow; \ 48 while (high-low > 3) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > bcol) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=low; _i<high; _i++ ) { \ 54 if (rp[_i] > bcol) break; \ 55 if (rp[_i] == bcol) { \ 56 bap = ap + bs2*_i + bs*cidx + ridx; \ 57 if (addv == ADD_VALUES) *bap += value; \ 58 else *bap = value; \ 59 goto a_noinsert; \ 60 } \ 61 } \ 62 if (a->nonew == 1) goto a_noinsert; \ 63 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 64 if (nrow >= rmax) { \ 65 /* there is no extra room in row, therefore enlarge */ \ 66 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 67 Scalar *new_a; \ 68 \ 69 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 70 \ 71 /* malloc new storage space */ \ 72 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 73 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 74 new_j = (int *) (new_a + bs2*new_nz); \ 75 new_i = new_j + new_nz; \ 76 \ 77 /* copy over old data into new slots */ \ 78 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 79 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 80 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 81 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 82 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 83 len*sizeof(int)); \ 84 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 85 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 86 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 87 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 88 /* free up old matrix storage */ \ 89 PetscFree(a->a); \ 90 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 91 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 92 a->singlemalloc = 1; \ 93 \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 96 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 97 a->maxnz += bs2*CHUNKSIZE; \ 98 a->reallocs++; \ 99 a->nz++; \ 100 } \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for ( ii=N; ii>=_i; ii-- ) { \ 104 rp[ii+1] = rp[ii]; \ 105 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 106 } \ 107 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 a_noinsert:; \ 111 ailen[brow] = nrow; \ 112 } 113 114 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 brow = row/bs; \ 118 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119 rmax = bimax[brow]; nrow = bilen[brow]; \ 120 bcol = col/bs; \ 121 ridx = row % bs; cidx = col % bs; \ 122 low = 0; high = nrow; \ 123 while (high-low > 3) { \ 124 t = (low+high)/2; \ 125 if (rp[t] > bcol) high = t; \ 126 else low = t; \ 127 } \ 128 for ( _i=low; _i<high; _i++ ) { \ 129 if (rp[_i] > bcol) break; \ 130 if (rp[_i] == bcol) { \ 131 bap = ap + bs2*_i + bs*cidx + ridx; \ 132 if (addv == ADD_VALUES) *bap += value; \ 133 else *bap = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (b->nonew == 1) goto b_noinsert; \ 138 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + bs2*new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158 len*sizeof(int)); \ 159 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163 /* free up old matrix storage */ \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 172 b->maxnz += bs2*CHUNKSIZE; \ 173 b->reallocs++; \ 174 b->nz++; \ 175 } \ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for ( ii=N; ii>=_i; ii-- ) { \ 179 rp[ii+1] = rp[ii]; \ 180 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181 } \ 182 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNC__ 190 #define __FUNC__ "MatSetValues_MPIBAIJ" 191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 194 Scalar value; 195 int ierr,i,j,row,col; 196 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 197 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 198 int cend_orig=baij->cend_bs,bs=baij->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 Scalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 Scalar *ba=b->a; 210 211 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 int low,high,t,ridx,cidx,bs2=a->bs2; 213 Scalar *ap,*bap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 #if defined(USE_PETSC_BOPT_g) 218 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig){ 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 #if defined(USE_PETSC_BOPT_g) 231 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 241 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ *) (B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba=b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !baij->donotstash) { 257 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!baij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272 #undef __FUNC__ 273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 Scalar *value,*barray=baij->barray; 278 int ierr,i,j,ii,jj,row,col,k,l; 279 int roworiented = baij->roworiented,rstart=baij->rstart ; 280 int rend=baij->rend,cstart=baij->cstart,stepval; 281 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282 283 if(!barray) { 284 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 285 } 286 287 if (roworiented) { 288 stepval = (n-1)*bs; 289 } else { 290 stepval = (m-1)*bs; 291 } 292 for ( i=0; i<m; i++ ) { 293 #if defined(USE_PETSC_BOPT_g) 294 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296 #endif 297 if (im[i] >= rstart && im[i] < rend) { 298 row = im[i] - rstart; 299 for ( j=0; j<n; j++ ) { 300 /* If NumCol = 1 then a copy is not required */ 301 if ((roworiented) && (n == 1)) { 302 barray = v + i*bs2; 303 } else if((!roworiented) && (m == 1)) { 304 barray = v + j*bs2; 305 } else { /* Here a copy is required */ 306 if (roworiented) { 307 value = v + i*(stepval+bs)*bs + j*bs; 308 } else { 309 value = v + j*(stepval+bs)*bs + i*bs; 310 } 311 for ( ii=0; ii<bs; ii++,value+=stepval ) { 312 for (jj=0; jj<bs; jj++ ) { 313 *barray++ = *value++; 314 } 315 } 316 barray -=bs2; 317 } 318 319 if (in[j] >= cstart && in[j] < cend){ 320 col = in[j] - cstart; 321 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322 } 323 #if defined(USE_PETSC_BOPT_g) 324 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326 #endif 327 else { 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 333 #if defined(USE_PETSC_BOPT_g) 334 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335 #endif 336 col = (baij->colmap[in[j]] - 1)/bs; 337 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339 col = in[j]; 340 } 341 } 342 else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (!baij->donotstash) { 348 if (roworiented ) { 349 row = im[i]*bs; 350 value = v + i*(stepval+bs)*bs; 351 for ( j=0; j<bs; j++,row++ ) { 352 for ( k=0; k<n; k++ ) { 353 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355 } 356 } 357 } 358 } else { 359 for ( j=0; j<n; j++ ) { 360 value = v + j*(stepval+bs)*bs + i*bs; 361 col = in[j]*bs; 362 for ( k=0; k<bs; k++,col++,value+=stepval) { 363 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 #include <math.h> 375 #define HASH_KEY 0.6180339887 376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNC__ 380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 384 int ierr,i,j,row,col; 385 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 386 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 387 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 388 double tmp; 389 Scalar ** HD = baij->hd,value; 390 #if defined(USE_PETSC_BOPT_g) 391 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 392 #endif 393 394 PetscFunctionBegin; 395 396 for ( i=0; i<m; i++ ) { 397 #if defined(USE_PETSC_BOPT_g) 398 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 399 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 400 #endif 401 row = im[i]; 402 if (row >= rstart_orig && row < rend_orig) { 403 for ( j=0; j<n; j++ ) { 404 col = in[j]; 405 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 406 /* Look up into the Hash Table */ 407 key = (row/bs)*Nbs+(col/bs)+1; 408 h1 = HASH(size,key,tmp); 409 410 411 idx = h1; 412 #if defined(USE_PETSC_BOPT_g) 413 insert_ct++; 414 total_ct++; 415 if (HT[idx] != key) { 416 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 417 if (idx == size) { 418 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 419 if (idx == h1) { 420 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 421 } 422 } 423 } 424 #else 425 if (HT[idx] != key) { 426 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 427 if (idx == size) { 428 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 429 if (idx == h1) { 430 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 431 } 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else { 440 if (roworiented && !baij->donotstash) { 441 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 442 } else { 443 if (!baij->donotstash) { 444 row = im[i]; 445 for ( j=0; j<n; j++ ) { 446 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 447 } 448 } 449 } 450 } 451 } 452 #if defined(USE_PETSC_BOPT_g) 453 baij->ht_total_ct = total_ct; 454 baij->ht_insert_ct = insert_ct; 455 #endif 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNC__ 460 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 461 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 462 { 463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 464 int ierr,i,j,ii,jj,row,col,k,l; 465 int roworiented = baij->roworiented,rstart=baij->rstart ; 466 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 467 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 468 double tmp; 469 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 470 #if defined(USE_PETSC_BOPT_g) 471 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 472 #endif 473 474 PetscFunctionBegin; 475 476 if (roworiented) { 477 stepval = (n-1)*bs; 478 } else { 479 stepval = (m-1)*bs; 480 } 481 for ( i=0; i<m; i++ ) { 482 #if defined(USE_PETSC_BOPT_g) 483 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 484 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 485 #endif 486 row = im[i]; 487 v_t = v + i*bs2; 488 if (row >= rstart && row < rend) { 489 for ( j=0; j<n; j++ ) { 490 col = in[j]; 491 492 /* Look up into the Hash Table */ 493 key = row*Nbs+col+1; 494 h1 = HASH(size,key,tmp); 495 496 idx = h1; 497 #if defined(USE_PETSC_BOPT_g) 498 total_ct++; 499 insert_ct++; 500 if (HT[idx] != key) { 501 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 502 if (idx == size) { 503 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 504 if (idx == h1) { 505 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 506 } 507 } 508 } 509 #else 510 if (HT[idx] != key) { 511 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 512 if (idx == size) { 513 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 514 if (idx == h1) { 515 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 516 } 517 } 518 } 519 #endif 520 baij_a = HD[idx]; 521 if (roworiented) { 522 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 523 /* value = v + (i*(stepval+bs)+j)*bs; */ 524 value = v_t; 525 v_t += bs; 526 if (addv == ADD_VALUES) { 527 for ( ii=0; ii<bs; ii++,value+=stepval) { 528 for ( jj=ii; jj<bs2; jj+=bs ) { 529 baij_a[jj] += *value++; 530 } 531 } 532 } else { 533 for ( ii=0; ii<bs; ii++,value+=stepval) { 534 for ( jj=ii; jj<bs2; jj+=bs ) { 535 baij_a[jj] = *value++; 536 } 537 } 538 } 539 } else { 540 value = v + j*(stepval+bs)*bs + i*bs; 541 if (addv == ADD_VALUES) { 542 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 543 for ( jj=0; jj<bs; jj++ ) { 544 baij_a[jj] += *value++; 545 } 546 } 547 } else { 548 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 549 for ( jj=0; jj<bs; jj++ ) { 550 baij_a[jj] = *value++; 551 } 552 } 553 } 554 } 555 } 556 } else { 557 if (!baij->donotstash) { 558 if (roworiented ) { 559 row = im[i]*bs; 560 value = v + i*(stepval+bs)*bs; 561 for ( j=0; j<bs; j++,row++ ) { 562 for ( k=0; k<n; k++ ) { 563 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 564 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 565 } 566 } 567 } 568 } else { 569 for ( j=0; j<n; j++ ) { 570 value = v + j*(stepval+bs)*bs + i*bs; 571 col = in[j]*bs; 572 for ( k=0; k<bs; k++,col++,value+=stepval) { 573 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 574 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 } 580 } 581 } 582 #if defined(USE_PETSC_BOPT_g) 583 baij->ht_total_ct = total_ct; 584 baij->ht_insert_ct = insert_ct; 585 #endif 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNC__ 590 #define __FUNC__ "MatGetValues_MPIBAIJ" 591 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 592 { 593 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 594 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 595 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 596 597 PetscFunctionBegin; 598 for ( i=0; i<m; i++ ) { 599 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 600 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 601 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 602 row = idxm[i] - bsrstart; 603 for ( j=0; j<n; j++ ) { 604 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 605 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 606 if (idxn[j] >= bscstart && idxn[j] < bscend){ 607 col = idxn[j] - bscstart; 608 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 609 } else { 610 if (!baij->colmap) { 611 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 612 } 613 if((baij->colmap[idxn[j]/bs]-1 < 0) || 614 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 615 else { 616 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 617 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 618 } 619 } 620 } 621 } else { 622 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 #undef __FUNC__ 629 #define __FUNC__ "MatNorm_MPIBAIJ" 630 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 631 { 632 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 633 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 634 int ierr, i,bs2=baij->bs2; 635 double sum = 0.0; 636 Scalar *v; 637 638 PetscFunctionBegin; 639 if (baij->size == 1) { 640 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 641 } else { 642 if (type == NORM_FROBENIUS) { 643 v = amat->a; 644 for (i=0; i<amat->nz*bs2; i++ ) { 645 #if defined(USE_PETSC_COMPLEX) 646 sum += PetscReal(PetscConj(*v)*(*v)); v++; 647 #else 648 sum += (*v)*(*v); v++; 649 #endif 650 } 651 v = bmat->a; 652 for (i=0; i<bmat->nz*bs2; i++ ) { 653 #if defined(USE_PETSC_COMPLEX) 654 sum += PetscReal(PetscConj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 660 *norm = sqrt(*norm); 661 } else { 662 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 663 } 664 } 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNC__ 669 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 670 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 673 MPI_Comm comm = mat->comm; 674 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 675 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 676 MPI_Request *send_waits,*recv_waits; 677 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 678 InsertMode addv; 679 Scalar *rvalues,*svalues; 680 681 PetscFunctionBegin; 682 if (baij->donotstash) { 683 baij->svalues = 0; baij->rvalues = 0; 684 baij->nsends = 0; baij->nrecvs = 0; 685 baij->send_waits = 0; baij->recv_waits = 0; 686 baij->rmax = 0; 687 PetscFunctionReturn(0); 688 } 689 690 /* make sure all processors are either in INSERTMODE or ADDMODE */ 691 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 692 if (addv == (ADD_VALUES|INSERT_VALUES)) { 693 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 694 } 695 mat->insertmode = addv; /* in case this processor had no cache */ 696 697 /* first count number of contributors to each processor */ 698 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 699 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 700 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 701 for ( i=0; i<baij->stash.n; i++ ) { 702 idx = baij->stash.idx[i]; 703 for ( j=0; j<size; j++ ) { 704 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 705 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 706 } 707 } 708 } 709 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 710 711 /* inform other processors of number of messages and max length*/ 712 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 713 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 714 nreceives = work[rank]; 715 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 716 nmax = work[rank]; 717 PetscFree(work); 718 719 /* post receives: 720 1) each message will consist of ordered pairs 721 (global index,value) we store the global index as a double 722 to simplify the message passing. 723 2) since we don't know how long each individual message is we 724 allocate the largest needed buffer for each receive. Potentially 725 this is a lot of wasted space. 726 727 728 This could be done better. 729 */ 730 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 731 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 732 for ( i=0; i<nreceives; i++ ) { 733 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 734 } 735 736 /* do sends: 737 1) starts[i] gives the starting index in svalues for stuff going to 738 the ith processor 739 */ 740 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 741 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 742 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 743 starts[0] = 0; 744 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 745 for ( i=0; i<baij->stash.n; i++ ) { 746 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 747 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 748 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 749 } 750 PetscFree(owner); 751 starts[0] = 0; 752 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 753 count = 0; 754 for ( i=0; i<size; i++ ) { 755 if (procs[i]) { 756 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 757 } 758 } 759 PetscFree(starts); PetscFree(nprocs); 760 761 /* Free cache space */ 762 PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 763 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 764 765 baij->svalues = svalues; baij->rvalues = rvalues; 766 baij->nsends = nsends; baij->nrecvs = nreceives; 767 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 768 baij->rmax = nmax; 769 770 PetscFunctionReturn(0); 771 } 772 773 /* 774 Creates the hash table, and sets the table 775 This table is created only once. 776 If new entried need to be added to the matrix 777 then the hash table has to be destroyed and 778 recreated. 779 */ 780 #undef __FUNC__ 781 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 782 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 783 { 784 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 785 Mat A = baij->A, B=baij->B; 786 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 787 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 788 int size,bs2=baij->bs2,rstart=baij->rstart; 789 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 790 int *HT,key; 791 Scalar **HD; 792 double tmp; 793 #if defined(USE_PETSC_BOPT_g) 794 int ct=0,max=0; 795 #endif 796 797 PetscFunctionBegin; 798 baij->ht_size=(int)(factor*nz); 799 size = baij->ht_size; 800 801 if (baij->ht) { 802 PetscFunctionReturn(0); 803 } 804 805 /* Allocate Memory for Hash Table */ 806 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 807 baij->ht = (int*)(baij->hd + size); 808 HD = baij->hd; 809 HT = baij->ht; 810 811 812 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 813 814 815 /* Loop Over A */ 816 for ( i=0; i<a->mbs; i++ ) { 817 for ( j=ai[i]; j<ai[i+1]; j++ ) { 818 row = i+rstart; 819 col = aj[j]+cstart; 820 821 key = row*Nbs + col + 1; 822 h1 = HASH(size,key,tmp); 823 for ( k=0; k<size; k++ ){ 824 if (HT[(h1+k)%size] == 0.0) { 825 HT[(h1+k)%size] = key; 826 HD[(h1+k)%size] = a->a + j*bs2; 827 break; 828 #if defined(USE_PETSC_BOPT_g) 829 } else { 830 ct++; 831 #endif 832 } 833 } 834 #if defined(USE_PETSC_BOPT_g) 835 if (k> max) max = k; 836 #endif 837 } 838 } 839 /* Loop Over B */ 840 for ( i=0; i<b->mbs; i++ ) { 841 for ( j=bi[i]; j<bi[i+1]; j++ ) { 842 row = i+rstart; 843 col = garray[bj[j]]; 844 key = row*Nbs + col + 1; 845 h1 = HASH(size,key,tmp); 846 for ( k=0; k<size; k++ ){ 847 if (HT[(h1+k)%size] == 0.0) { 848 HT[(h1+k)%size] = key; 849 HD[(h1+k)%size] = b->a + j*bs2; 850 break; 851 #if defined(USE_PETSC_BOPT_g) 852 } else { 853 ct++; 854 #endif 855 } 856 } 857 #if defined(USE_PETSC_BOPT_g) 858 if (k> max) max = k; 859 #endif 860 } 861 } 862 863 /* Print Summary */ 864 #if defined(USE_PETSC_BOPT_g) 865 for ( i=0,j=0; i<size; i++) 866 if (HT[i]) {j++;} 867 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 868 (j== 0)? 0.0:((double)(ct+j))/j,max); 869 #endif 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNC__ 874 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 875 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 876 { 877 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 878 MPI_Status *send_status,recv_status; 879 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 880 int bs=baij->bs,row,col,other_disassembled; 881 Scalar *values,val; 882 InsertMode addv = mat->insertmode; 883 884 PetscFunctionBegin; 885 /* wait on receives */ 886 while (count) { 887 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 888 /* unpack receives into our local space */ 889 values = baij->rvalues + 3*imdex*baij->rmax; 890 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 891 n = n/3; 892 for ( i=0; i<n; i++ ) { 893 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 894 col = (int) PetscReal(values[3*i+1]); 895 val = values[3*i+2]; 896 if (col >= baij->cstart*bs && col < baij->cend*bs) { 897 col -= baij->cstart*bs; 898 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 899 } else { 900 if (mat->was_assembled) { 901 if (!baij->colmap) { 902 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 903 } 904 col = (baij->colmap[col/bs]) - 1 + col%bs; 905 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 906 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 907 col = (int) PetscReal(values[3*i+1]); 908 } 909 } 910 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 911 } 912 } 913 count--; 914 } 915 if (baij->recv_waits) PetscFree(baij->recv_waits); 916 if (baij->rvalues) PetscFree(baij->rvalues); 917 918 /* wait on sends */ 919 if (baij->nsends) { 920 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 921 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 922 PetscFree(send_status); 923 } 924 if (baij->send_waits) PetscFree(baij->send_waits); 925 if (baij->svalues) PetscFree(baij->svalues); 926 927 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 928 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 929 930 /* determine if any processor has disassembled, if so we must 931 also disassemble ourselfs, in order that we may reassemble. */ 932 /* 933 if nonzero structure of submatrix B cannot change then we know that 934 no processor disassembled thus we can skip this stuff 935 */ 936 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 937 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 938 if (mat->was_assembled && !other_disassembled) { 939 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 940 } 941 } 942 943 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 944 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 945 } 946 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 947 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 948 949 #if defined(USE_PETSC_BOPT_g) 950 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 951 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 952 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 953 baij->ht_total_ct = 0; 954 baij->ht_insert_ct = 0; 955 } 956 #endif 957 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 958 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 959 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 960 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 961 } 962 963 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 964 PetscFunctionReturn(0); 965 } 966 967 #undef __FUNC__ 968 #define __FUNC__ "MatView_MPIBAIJ_Binary" 969 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 970 { 971 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 972 int ierr; 973 974 PetscFunctionBegin; 975 if (baij->size == 1) { 976 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 977 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNC__ 982 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 983 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 984 { 985 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 986 int ierr, format,rank,bs = baij->bs; 987 FILE *fd; 988 ViewerType vtype; 989 990 PetscFunctionBegin; 991 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 992 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 993 ierr = ViewerGetFormat(viewer,&format); 994 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 995 MatInfo info; 996 MPI_Comm_rank(mat->comm,&rank); 997 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 998 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 999 PetscSequentialPhaseBegin(mat->comm,1); 1000 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1001 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1002 baij->bs,(int)info.memory); 1003 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 1004 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 1005 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 1006 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 1007 fflush(fd); 1008 PetscSequentialPhaseEnd(mat->comm,1); 1009 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1012 PetscPrintf(mat->comm," block size is %d\n",bs); 1013 PetscFunctionReturn(0); 1014 } 1015 } 1016 1017 if (vtype == DRAW_VIEWER) { 1018 Draw draw; 1019 PetscTruth isnull; 1020 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 1021 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1022 } 1023 1024 if (vtype == ASCII_FILE_VIEWER) { 1025 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1026 PetscSequentialPhaseBegin(mat->comm,1); 1027 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 1028 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 1029 baij->cstart*bs,baij->cend*bs); 1030 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1031 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 1032 fflush(fd); 1033 PetscSequentialPhaseEnd(mat->comm,1); 1034 } else { 1035 int size = baij->size; 1036 rank = baij->rank; 1037 if (size == 1) { 1038 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1039 } else { 1040 /* assemble the entire matrix onto first processor. */ 1041 Mat A; 1042 Mat_SeqBAIJ *Aloc; 1043 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 1044 int mbs=baij->mbs; 1045 Scalar *a; 1046 1047 if (!rank) { 1048 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1049 } else { 1050 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1051 } 1052 PLogObjectParent(mat,A); 1053 1054 /* copy over the A part */ 1055 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1056 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1057 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1058 1059 for ( i=0; i<mbs; i++ ) { 1060 rvals[0] = bs*(baij->rstart + i); 1061 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1062 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1063 col = (baij->cstart+aj[j])*bs; 1064 for (k=0; k<bs; k++ ) { 1065 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1066 col++; a += bs; 1067 } 1068 } 1069 } 1070 /* copy over the B part */ 1071 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1072 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1073 for ( i=0; i<mbs; i++ ) { 1074 rvals[0] = bs*(baij->rstart + i); 1075 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1076 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1077 col = baij->garray[aj[j]]*bs; 1078 for (k=0; k<bs; k++ ) { 1079 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1080 col++; a += bs; 1081 } 1082 } 1083 } 1084 PetscFree(rvals); 1085 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1086 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1087 /* 1088 Everyone has to call to draw the matrix since the graphics waits are 1089 synchronized across all processors that share the Draw object 1090 */ 1091 if (!rank || vtype == DRAW_VIEWER) { 1092 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1093 } 1094 ierr = MatDestroy(A); CHKERRQ(ierr); 1095 } 1096 } 1097 PetscFunctionReturn(0); 1098 } 1099 1100 1101 1102 #undef __FUNC__ 1103 #define __FUNC__ "MatView_MPIBAIJ" 1104 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1105 { 1106 int ierr; 1107 ViewerType vtype; 1108 1109 PetscFunctionBegin; 1110 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1111 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 1112 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 1113 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1114 } else if (vtype == BINARY_FILE_VIEWER) { 1115 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1116 } else { 1117 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1118 } 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNC__ 1123 #define __FUNC__ "MatDestroy_MPIBAIJ" 1124 int MatDestroy_MPIBAIJ(Mat mat) 1125 { 1126 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1127 int ierr; 1128 1129 PetscFunctionBegin; 1130 if (--mat->refct > 0) PetscFunctionReturn(0); 1131 1132 if (mat->mapping) { 1133 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 1134 } 1135 if (mat->bmapping) { 1136 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 1137 } 1138 #if defined(USE_PETSC_LOG) 1139 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1140 #endif 1141 1142 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1143 PetscFree(baij->rowners); 1144 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1145 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1146 if (baij->colmap) PetscFree(baij->colmap); 1147 if (baij->garray) PetscFree(baij->garray); 1148 if (baij->lvec) VecDestroy(baij->lvec); 1149 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1150 if (baij->rowvalues) PetscFree(baij->rowvalues); 1151 if (baij->barray) PetscFree(baij->barray); 1152 if (baij->hd) PetscFree(baij->hd); 1153 PetscFree(baij); 1154 PLogObjectDestroy(mat); 1155 PetscHeaderDestroy(mat); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNC__ 1160 #define __FUNC__ "MatMult_MPIBAIJ" 1161 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1162 { 1163 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1164 int ierr, nt; 1165 1166 PetscFunctionBegin; 1167 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1168 if (nt != a->n) { 1169 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1170 } 1171 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1172 if (nt != a->m) { 1173 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1174 } 1175 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1176 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1177 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1178 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1179 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNC__ 1184 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1185 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1186 { 1187 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1188 int ierr; 1189 1190 PetscFunctionBegin; 1191 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1192 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1193 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1194 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNC__ 1199 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1200 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1201 { 1202 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1203 int ierr; 1204 1205 PetscFunctionBegin; 1206 /* do nondiagonal part */ 1207 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1208 /* send it on its way */ 1209 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1210 /* do local part */ 1211 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1212 /* receive remote parts: note this assumes the values are not actually */ 1213 /* inserted in yy until the next line, which is true for my implementation*/ 1214 /* but is not perhaps always true. */ 1215 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNC__ 1220 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1221 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1222 { 1223 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1224 int ierr; 1225 1226 PetscFunctionBegin; 1227 /* do nondiagonal part */ 1228 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1229 /* send it on its way */ 1230 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1231 /* do local part */ 1232 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1233 /* receive remote parts: note this assumes the values are not actually */ 1234 /* inserted in yy until the next line, which is true for my implementation*/ 1235 /* but is not perhaps always true. */ 1236 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 /* 1241 This only works correctly for square matrices where the subblock A->A is the 1242 diagonal block 1243 */ 1244 #undef __FUNC__ 1245 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1246 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1247 { 1248 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1249 int ierr; 1250 1251 PetscFunctionBegin; 1252 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1253 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNC__ 1258 #define __FUNC__ "MatScale_MPIBAIJ" 1259 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1260 { 1261 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1262 int ierr; 1263 1264 PetscFunctionBegin; 1265 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1266 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNC__ 1271 #define __FUNC__ "MatGetSize_MPIBAIJ" 1272 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1273 { 1274 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1275 1276 PetscFunctionBegin; 1277 if (m) *m = mat->M; 1278 if (n) *n = mat->N; 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #undef __FUNC__ 1283 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1284 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1285 { 1286 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1287 1288 PetscFunctionBegin; 1289 *m = mat->m; *n = mat->n; 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNC__ 1294 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1295 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1296 { 1297 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1298 1299 PetscFunctionBegin; 1300 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1301 PetscFunctionReturn(0); 1302 } 1303 1304 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1305 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1306 1307 #undef __FUNC__ 1308 #define __FUNC__ "MatGetRow_MPIBAIJ" 1309 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1310 { 1311 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1312 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1313 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1314 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1315 int *cmap, *idx_p,cstart = mat->cstart; 1316 1317 PetscFunctionBegin; 1318 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1319 mat->getrowactive = PETSC_TRUE; 1320 1321 if (!mat->rowvalues && (idx || v)) { 1322 /* 1323 allocate enough space to hold information from the longest row. 1324 */ 1325 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1326 int max = 1,mbs = mat->mbs,tmp; 1327 for ( i=0; i<mbs; i++ ) { 1328 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1329 if (max < tmp) { max = tmp; } 1330 } 1331 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1332 CHKPTRQ(mat->rowvalues); 1333 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1334 } 1335 1336 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1337 lrow = row - brstart; 1338 1339 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1340 if (!v) {pvA = 0; pvB = 0;} 1341 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1342 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1343 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1344 nztot = nzA + nzB; 1345 1346 cmap = mat->garray; 1347 if (v || idx) { 1348 if (nztot) { 1349 /* Sort by increasing column numbers, assuming A and B already sorted */ 1350 int imark = -1; 1351 if (v) { 1352 *v = v_p = mat->rowvalues; 1353 for ( i=0; i<nzB; i++ ) { 1354 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1355 else break; 1356 } 1357 imark = i; 1358 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1359 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1360 } 1361 if (idx) { 1362 *idx = idx_p = mat->rowindices; 1363 if (imark > -1) { 1364 for ( i=0; i<imark; i++ ) { 1365 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1366 } 1367 } else { 1368 for ( i=0; i<nzB; i++ ) { 1369 if (cmap[cworkB[i]/bs] < cstart) 1370 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1371 else break; 1372 } 1373 imark = i; 1374 } 1375 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1376 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1377 } 1378 } else { 1379 if (idx) *idx = 0; 1380 if (v) *v = 0; 1381 } 1382 } 1383 *nz = nztot; 1384 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1385 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #undef __FUNC__ 1390 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1391 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1392 { 1393 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1394 1395 PetscFunctionBegin; 1396 if (baij->getrowactive == PETSC_FALSE) { 1397 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1398 } 1399 baij->getrowactive = PETSC_FALSE; 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNC__ 1404 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1405 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1406 { 1407 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1408 1409 PetscFunctionBegin; 1410 *bs = baij->bs; 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNC__ 1415 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1416 int MatZeroEntries_MPIBAIJ(Mat A) 1417 { 1418 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1419 int ierr; 1420 1421 PetscFunctionBegin; 1422 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1423 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNC__ 1428 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1429 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1430 { 1431 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1432 Mat A = a->A, B = a->B; 1433 int ierr; 1434 double isend[5], irecv[5]; 1435 1436 PetscFunctionBegin; 1437 info->block_size = (double)a->bs; 1438 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1439 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1440 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1441 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1442 if (flag == MAT_LOCAL) { 1443 info->nz_used = isend[0]; 1444 info->nz_allocated = isend[1]; 1445 info->nz_unneeded = isend[2]; 1446 info->memory = isend[3]; 1447 info->mallocs = isend[4]; 1448 } else if (flag == MAT_GLOBAL_MAX) { 1449 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1450 info->nz_used = irecv[0]; 1451 info->nz_allocated = irecv[1]; 1452 info->nz_unneeded = irecv[2]; 1453 info->memory = irecv[3]; 1454 info->mallocs = irecv[4]; 1455 } else if (flag == MAT_GLOBAL_SUM) { 1456 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1457 info->nz_used = irecv[0]; 1458 info->nz_allocated = irecv[1]; 1459 info->nz_unneeded = irecv[2]; 1460 info->memory = irecv[3]; 1461 info->mallocs = irecv[4]; 1462 } 1463 info->rows_global = (double)a->M; 1464 info->columns_global = (double)a->N; 1465 info->rows_local = (double)a->m; 1466 info->columns_local = (double)a->N; 1467 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1468 info->fill_ratio_needed = 0; 1469 info->factor_mallocs = 0; 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNC__ 1474 #define __FUNC__ "MatSetOption_MPIBAIJ" 1475 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1476 { 1477 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1478 1479 PetscFunctionBegin; 1480 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1481 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1482 op == MAT_COLUMNS_UNSORTED || 1483 op == MAT_COLUMNS_SORTED || 1484 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1485 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1486 MatSetOption(a->A,op); 1487 MatSetOption(a->B,op); 1488 } else if (op == MAT_ROW_ORIENTED) { 1489 a->roworiented = 1; 1490 MatSetOption(a->A,op); 1491 MatSetOption(a->B,op); 1492 } else if (op == MAT_ROWS_SORTED || 1493 op == MAT_ROWS_UNSORTED || 1494 op == MAT_SYMMETRIC || 1495 op == MAT_STRUCTURALLY_SYMMETRIC || 1496 op == MAT_YES_NEW_DIAGONALS || 1497 op == MAT_USE_HASH_TABLE) 1498 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1499 else if (op == MAT_COLUMN_ORIENTED) { 1500 a->roworiented = 0; 1501 MatSetOption(a->A,op); 1502 MatSetOption(a->B,op); 1503 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1504 a->donotstash = 1; 1505 } else if (op == MAT_NO_NEW_DIAGONALS) { 1506 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1507 } else if (op == MAT_USE_HASH_TABLE) { 1508 a->ht_flag = 1; 1509 } else { 1510 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1511 } 1512 PetscFunctionReturn(0); 1513 } 1514 1515 #undef __FUNC__ 1516 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1517 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1518 { 1519 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1520 Mat_SeqBAIJ *Aloc; 1521 Mat B; 1522 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1523 int bs=baij->bs,mbs=baij->mbs; 1524 Scalar *a; 1525 1526 PetscFunctionBegin; 1527 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1528 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1529 CHKERRQ(ierr); 1530 1531 /* copy over the A part */ 1532 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1533 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1534 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1535 1536 for ( i=0; i<mbs; i++ ) { 1537 rvals[0] = bs*(baij->rstart + i); 1538 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1539 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1540 col = (baij->cstart+aj[j])*bs; 1541 for (k=0; k<bs; k++ ) { 1542 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1543 col++; a += bs; 1544 } 1545 } 1546 } 1547 /* copy over the B part */ 1548 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1549 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1550 for ( i=0; i<mbs; i++ ) { 1551 rvals[0] = bs*(baij->rstart + i); 1552 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1553 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1554 col = baij->garray[aj[j]]*bs; 1555 for (k=0; k<bs; k++ ) { 1556 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1557 col++; a += bs; 1558 } 1559 } 1560 } 1561 PetscFree(rvals); 1562 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1563 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1564 1565 if (matout != PETSC_NULL) { 1566 *matout = B; 1567 } else { 1568 PetscOps *Abops; 1569 struct _MatOps *Aops; 1570 1571 /* This isn't really an in-place transpose .... but free data structures from baij */ 1572 PetscFree(baij->rowners); 1573 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1574 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1575 if (baij->colmap) PetscFree(baij->colmap); 1576 if (baij->garray) PetscFree(baij->garray); 1577 if (baij->lvec) VecDestroy(baij->lvec); 1578 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1579 PetscFree(baij); 1580 1581 /* 1582 This is horrible, horrible code. We need to keep the 1583 A pointers for the bops and ops but copy everything 1584 else from C. 1585 */ 1586 Abops = A->bops; 1587 Aops = A->ops; 1588 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1589 A->bops = Abops; 1590 A->ops = Aops; 1591 1592 PetscHeaderDestroy(B); 1593 } 1594 PetscFunctionReturn(0); 1595 } 1596 1597 #undef __FUNC__ 1598 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1599 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1600 { 1601 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1602 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1603 int ierr,s1,s2,s3; 1604 1605 PetscFunctionBegin; 1606 if (ll) { 1607 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1608 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1609 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1610 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1611 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1612 } 1613 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #undef __FUNC__ 1618 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1619 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1620 { 1621 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1622 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1623 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1624 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1625 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1626 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1627 MPI_Comm comm = A->comm; 1628 MPI_Request *send_waits,*recv_waits; 1629 MPI_Status recv_status,*send_status; 1630 IS istmp; 1631 PetscTruth localdiag; 1632 1633 PetscFunctionBegin; 1634 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1635 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1636 1637 /* first count number of contributors to each processor */ 1638 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1639 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1640 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1641 for ( i=0; i<N; i++ ) { 1642 idx = rows[i]; 1643 found = 0; 1644 for ( j=0; j<size; j++ ) { 1645 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1646 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1647 } 1648 } 1649 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1650 } 1651 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1652 1653 /* inform other processors of number of messages and max length*/ 1654 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1655 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1656 nrecvs = work[rank]; 1657 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1658 nmax = work[rank]; 1659 PetscFree(work); 1660 1661 /* post receives: */ 1662 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1663 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1664 for ( i=0; i<nrecvs; i++ ) { 1665 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1666 } 1667 1668 /* do sends: 1669 1) starts[i] gives the starting index in svalues for stuff going to 1670 the ith processor 1671 */ 1672 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1673 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1674 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1675 starts[0] = 0; 1676 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1677 for ( i=0; i<N; i++ ) { 1678 svalues[starts[owner[i]]++] = rows[i]; 1679 } 1680 ISRestoreIndices(is,&rows); 1681 1682 starts[0] = 0; 1683 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1684 count = 0; 1685 for ( i=0; i<size; i++ ) { 1686 if (procs[i]) { 1687 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1688 } 1689 } 1690 PetscFree(starts); 1691 1692 base = owners[rank]*bs; 1693 1694 /* wait on receives */ 1695 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1696 source = lens + nrecvs; 1697 count = nrecvs; slen = 0; 1698 while (count) { 1699 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1700 /* unpack receives into our local space */ 1701 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1702 source[imdex] = recv_status.MPI_SOURCE; 1703 lens[imdex] = n; 1704 slen += n; 1705 count--; 1706 } 1707 PetscFree(recv_waits); 1708 1709 /* move the data into the send scatter */ 1710 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1711 count = 0; 1712 for ( i=0; i<nrecvs; i++ ) { 1713 values = rvalues + i*nmax; 1714 for ( j=0; j<lens[i]; j++ ) { 1715 lrows[count++] = values[j] - base; 1716 } 1717 } 1718 PetscFree(rvalues); PetscFree(lens); 1719 PetscFree(owner); PetscFree(nprocs); 1720 1721 /* actually zap the local rows */ 1722 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1723 PLogObjectParent(A,istmp); 1724 1725 /* 1726 Zero the required rows. If the "diagonal block" of the matrix 1727 is square and the user wishes to set the diagonal we use seperate 1728 code so that MatSetValues() is not called for each diagonal allocating 1729 new memory, thus calling lots of mallocs and slowing things down. 1730 1731 Contributed by: Mathew Knepley 1732 */ 1733 localdiag = PETSC_FALSE; 1734 if (diag && (l->A->M == l->A->N)) { 1735 localdiag = PETSC_TRUE; 1736 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1737 } else { 1738 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1739 } 1740 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1741 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1742 1743 if (diag && (localdiag == PETSC_FALSE)) { 1744 for ( i = 0; i < slen; i++ ) { 1745 row = lrows[i] + rstart_bs; 1746 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1747 } 1748 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1749 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1750 } 1751 PetscFree(lrows); 1752 1753 /* wait on sends */ 1754 if (nsends) { 1755 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1756 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1757 PetscFree(send_status); 1758 } 1759 PetscFree(send_waits); PetscFree(svalues); 1760 1761 PetscFunctionReturn(0); 1762 } 1763 1764 extern int MatPrintHelp_SeqBAIJ(Mat); 1765 #undef __FUNC__ 1766 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1767 int MatPrintHelp_MPIBAIJ(Mat A) 1768 { 1769 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1770 MPI_Comm comm = A->comm; 1771 static int called = 0; 1772 int ierr; 1773 1774 PetscFunctionBegin; 1775 if (!a->rank) { 1776 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1777 } 1778 if (called) {PetscFunctionReturn(0);} else called = 1; 1779 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1780 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNC__ 1785 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1786 int MatSetUnfactored_MPIBAIJ(Mat A) 1787 { 1788 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1789 int ierr; 1790 1791 PetscFunctionBegin; 1792 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1793 PetscFunctionReturn(0); 1794 } 1795 1796 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1797 1798 /* -------------------------------------------------------------------*/ 1799 static struct _MatOps MatOps = { 1800 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1801 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1802 0,0,0,0, 1803 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1804 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1805 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1806 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1807 0,0,0,MatGetSize_MPIBAIJ, 1808 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1809 0,0,MatConvertSameType_MPIBAIJ,0,0, 1810 0,0,0,MatGetSubMatrices_MPIBAIJ, 1811 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1812 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1813 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1814 1815 1816 #undef __FUNC__ 1817 #define __FUNC__ "MatCreateMPIBAIJ" 1818 /*@C 1819 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1820 (block compressed row). For good matrix assembly performance 1821 the user should preallocate the matrix storage by setting the parameters 1822 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1823 performance can be increased by more than a factor of 50. 1824 1825 Collective on MPI_Comm 1826 1827 Input Parameters: 1828 + comm - MPI communicator 1829 . bs - size of blockk 1830 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1831 This value should be the same as the local size used in creating the 1832 y vector for the matrix-vector product y = Ax. 1833 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1834 This value should be the same as the local size used in creating the 1835 x vector for the matrix-vector product y = Ax. 1836 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1837 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1838 . d_nz - number of block nonzeros per block row in diagonal portion of local 1839 submatrix (same for all local rows) 1840 . d_nzz - array containing the number of block nonzeros in the various block rows 1841 of the in diagonal portion of the local (possibly different for each block 1842 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1843 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1844 submatrix (same for all local rows). 1845 - o_nzz - array containing the number of nonzeros in the various block rows of the 1846 off-diagonal portion of the local submatrix (possibly different for 1847 each block row) or PETSC_NULL. 1848 1849 Output Parameter: 1850 . A - the matrix 1851 1852 Options Database Keys: 1853 . -mat_no_unroll - uses code that does not unroll the loops in the 1854 block calculations (much slower) 1855 . -mat_block_size - size of the blocks to use 1856 1857 Notes: 1858 The user MUST specify either the local or global matrix dimensions 1859 (possibly both). 1860 1861 Storage Information: 1862 For a square global matrix we define each processor's diagonal portion 1863 to be its local rows and the corresponding columns (a square submatrix); 1864 each processor's off-diagonal portion encompasses the remainder of the 1865 local matrix (a rectangular submatrix). 1866 1867 The user can specify preallocated storage for the diagonal part of 1868 the local submatrix with either d_nz or d_nnz (not both). Set 1869 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1870 memory allocation. Likewise, specify preallocated storage for the 1871 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1872 1873 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1874 the figure below we depict these three local rows and all columns (0-11). 1875 1876 .vb 1877 0 1 2 3 4 5 6 7 8 9 10 11 1878 ------------------- 1879 row 3 | o o o d d d o o o o o o 1880 row 4 | o o o d d d o o o o o o 1881 row 5 | o o o d d d o o o o o o 1882 ------------------- 1883 .ve 1884 1885 Thus, any entries in the d locations are stored in the d (diagonal) 1886 submatrix, and any entries in the o locations are stored in the 1887 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1888 stored simply in the MATSEQBAIJ format for compressed row storage. 1889 1890 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1891 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1892 In general, for PDE problems in which most nonzeros are near the diagonal, 1893 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1894 or you will get TERRIBLE performance; see the users' manual chapter on 1895 matrices. 1896 1897 .keywords: matrix, block, aij, compressed row, sparse, parallel 1898 1899 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1900 @*/ 1901 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1902 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1903 { 1904 Mat B; 1905 Mat_MPIBAIJ *b; 1906 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1907 1908 PetscFunctionBegin; 1909 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1910 1911 MPI_Comm_size(comm,&size); 1912 if (size == 1) { 1913 if (M == PETSC_DECIDE) M = m; 1914 if (N == PETSC_DECIDE) N = n; 1915 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 *A = 0; 1920 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1921 PLogObjectCreate(B); 1922 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1923 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1924 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1925 1926 B->ops->destroy = MatDestroy_MPIBAIJ; 1927 B->ops->view = MatView_MPIBAIJ; 1928 B->mapping = 0; 1929 B->factor = 0; 1930 B->assembled = PETSC_FALSE; 1931 1932 B->insertmode = NOT_SET_VALUES; 1933 MPI_Comm_rank(comm,&b->rank); 1934 MPI_Comm_size(comm,&b->size); 1935 1936 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1937 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1938 } 1939 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1940 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1941 } 1942 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1943 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1944 } 1945 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1946 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1947 1948 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1949 work[0] = m; work[1] = n; 1950 mbs = m/bs; nbs = n/bs; 1951 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1952 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1953 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1954 } 1955 if (m == PETSC_DECIDE) { 1956 Mbs = M/bs; 1957 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1958 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1959 m = mbs*bs; 1960 } 1961 if (n == PETSC_DECIDE) { 1962 Nbs = N/bs; 1963 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1964 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1965 n = nbs*bs; 1966 } 1967 if (mbs*bs != m || nbs*bs != n) { 1968 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1969 } 1970 1971 b->m = m; B->m = m; 1972 b->n = n; B->n = n; 1973 b->N = N; B->N = N; 1974 b->M = M; B->M = M; 1975 b->bs = bs; 1976 b->bs2 = bs*bs; 1977 b->mbs = mbs; 1978 b->nbs = nbs; 1979 b->Mbs = Mbs; 1980 b->Nbs = Nbs; 1981 1982 /* build local table of row and column ownerships */ 1983 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1984 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1985 b->cowners = b->rowners + b->size + 2; 1986 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1987 b->rowners[0] = 0; 1988 for ( i=2; i<=b->size; i++ ) { 1989 b->rowners[i] += b->rowners[i-1]; 1990 } 1991 b->rstart = b->rowners[b->rank]; 1992 b->rend = b->rowners[b->rank+1]; 1993 b->rstart_bs = b->rstart * bs; 1994 b->rend_bs = b->rend * bs; 1995 1996 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1997 b->cowners[0] = 0; 1998 for ( i=2; i<=b->size; i++ ) { 1999 b->cowners[i] += b->cowners[i-1]; 2000 } 2001 b->cstart = b->cowners[b->rank]; 2002 b->cend = b->cowners[b->rank+1]; 2003 b->cstart_bs = b->cstart * bs; 2004 b->cend_bs = b->cend * bs; 2005 2006 2007 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2008 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 2009 PLogObjectParent(B,b->A); 2010 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2011 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 2012 PLogObjectParent(B,b->B); 2013 2014 /* build cache for off array entries formed */ 2015 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 2016 b->donotstash = 0; 2017 b->colmap = 0; 2018 b->garray = 0; 2019 b->roworiented = 1; 2020 2021 /* stuff used in block assembly */ 2022 b->barray = 0; 2023 2024 /* stuff used for matrix vector multiply */ 2025 b->lvec = 0; 2026 b->Mvctx = 0; 2027 2028 /* stuff for MatGetRow() */ 2029 b->rowindices = 0; 2030 b->rowvalues = 0; 2031 b->getrowactive = PETSC_FALSE; 2032 2033 /* hash table stuff */ 2034 b->ht = 0; 2035 b->hd = 0; 2036 b->ht_size = 0; 2037 b->ht_flag = 0; 2038 b->ht_fact = 0; 2039 b->ht_total_ct = 0; 2040 b->ht_insert_ct = 0; 2041 2042 *A = B; 2043 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2044 if (flg) { 2045 double fact = 1.39; 2046 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2047 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2048 if (fact <= 1.0) fact = 1.39; 2049 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2050 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2051 } 2052 PetscFunctionReturn(0); 2053 } 2054 2055 #undef __FUNC__ 2056 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 2057 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 2058 { 2059 Mat mat; 2060 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2061 int ierr, len=0, flg; 2062 2063 PetscFunctionBegin; 2064 *newmat = 0; 2065 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 2066 PLogObjectCreate(mat); 2067 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2068 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 2069 mat->ops->destroy = MatDestroy_MPIBAIJ; 2070 mat->ops->view = MatView_MPIBAIJ; 2071 mat->factor = matin->factor; 2072 mat->assembled = PETSC_TRUE; 2073 2074 a->m = mat->m = oldmat->m; 2075 a->n = mat->n = oldmat->n; 2076 a->M = mat->M = oldmat->M; 2077 a->N = mat->N = oldmat->N; 2078 2079 a->bs = oldmat->bs; 2080 a->bs2 = oldmat->bs2; 2081 a->mbs = oldmat->mbs; 2082 a->nbs = oldmat->nbs; 2083 a->Mbs = oldmat->Mbs; 2084 a->Nbs = oldmat->Nbs; 2085 2086 a->rstart = oldmat->rstart; 2087 a->rend = oldmat->rend; 2088 a->cstart = oldmat->cstart; 2089 a->cend = oldmat->cend; 2090 a->size = oldmat->size; 2091 a->rank = oldmat->rank; 2092 mat->insertmode = NOT_SET_VALUES; 2093 a->rowvalues = 0; 2094 a->getrowactive = PETSC_FALSE; 2095 a->barray = 0; 2096 2097 /* hash table stuff */ 2098 a->ht = 0; 2099 a->hd = 0; 2100 a->ht_size = 0; 2101 a->ht_flag = oldmat->ht_flag; 2102 a->ht_fact = oldmat->ht_fact; 2103 a->ht_total_ct = 0; 2104 a->ht_insert_ct = 0; 2105 2106 2107 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2108 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2109 a->cowners = a->rowners + a->size + 2; 2110 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2111 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 2112 if (oldmat->colmap) { 2113 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2114 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2115 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2116 } else a->colmap = 0; 2117 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2118 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2119 PLogObjectMemory(mat,len*sizeof(int)); 2120 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2121 } else a->garray = 0; 2122 2123 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2124 PLogObjectParent(mat,a->lvec); 2125 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2126 PLogObjectParent(mat,a->Mvctx); 2127 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 2128 PLogObjectParent(mat,a->A); 2129 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 2130 PLogObjectParent(mat,a->B); 2131 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2132 if (flg) { 2133 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2134 } 2135 *newmat = mat; 2136 PetscFunctionReturn(0); 2137 } 2138 2139 #include "sys.h" 2140 2141 #undef __FUNC__ 2142 #define __FUNC__ "MatLoad_MPIBAIJ" 2143 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2144 { 2145 Mat A; 2146 int i, nz, ierr, j,rstart, rend, fd; 2147 Scalar *vals,*buf; 2148 MPI_Comm comm = ((PetscObject)viewer)->comm; 2149 MPI_Status status; 2150 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2151 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2152 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2153 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2154 int dcount,kmax,k,nzcount,tmp; 2155 2156 PetscFunctionBegin; 2157 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2158 2159 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2160 if (!rank) { 2161 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2162 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2163 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2164 if (header[3] < 0) { 2165 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2166 } 2167 } 2168 2169 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2170 M = header[1]; N = header[2]; 2171 2172 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2173 2174 /* 2175 This code adds extra rows to make sure the number of rows is 2176 divisible by the blocksize 2177 */ 2178 Mbs = M/bs; 2179 extra_rows = bs - M + bs*(Mbs); 2180 if (extra_rows == bs) extra_rows = 0; 2181 else Mbs++; 2182 if (extra_rows &&!rank) { 2183 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2184 } 2185 2186 /* determine ownership of all rows */ 2187 mbs = Mbs/size + ((Mbs % size) > rank); 2188 m = mbs * bs; 2189 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2190 browners = rowners + size + 1; 2191 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2192 rowners[0] = 0; 2193 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2194 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2195 rstart = rowners[rank]; 2196 rend = rowners[rank+1]; 2197 2198 /* distribute row lengths to all processors */ 2199 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2200 if (!rank) { 2201 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2202 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2203 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2204 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2205 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2206 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2207 PetscFree(sndcounts); 2208 } else { 2209 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2210 } 2211 2212 if (!rank) { 2213 /* calculate the number of nonzeros on each processor */ 2214 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2215 PetscMemzero(procsnz,size*sizeof(int)); 2216 for ( i=0; i<size; i++ ) { 2217 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2218 procsnz[i] += rowlengths[j]; 2219 } 2220 } 2221 PetscFree(rowlengths); 2222 2223 /* determine max buffer needed and allocate it */ 2224 maxnz = 0; 2225 for ( i=0; i<size; i++ ) { 2226 maxnz = PetscMax(maxnz,procsnz[i]); 2227 } 2228 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2229 2230 /* read in my part of the matrix column indices */ 2231 nz = procsnz[0]; 2232 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2233 mycols = ibuf; 2234 if (size == 1) nz -= extra_rows; 2235 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2236 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2237 2238 /* read in every ones (except the last) and ship off */ 2239 for ( i=1; i<size-1; i++ ) { 2240 nz = procsnz[i]; 2241 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2242 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2243 } 2244 /* read in the stuff for the last proc */ 2245 if ( size != 1 ) { 2246 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2247 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2248 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2249 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2250 } 2251 PetscFree(cols); 2252 } else { 2253 /* determine buffer space needed for message */ 2254 nz = 0; 2255 for ( i=0; i<m; i++ ) { 2256 nz += locrowlens[i]; 2257 } 2258 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2259 mycols = ibuf; 2260 /* receive message of column indices*/ 2261 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2262 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2263 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2264 } 2265 2266 /* loop over local rows, determining number of off diagonal entries */ 2267 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2268 odlens = dlens + (rend-rstart); 2269 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2270 PetscMemzero(mask,3*Mbs*sizeof(int)); 2271 masked1 = mask + Mbs; 2272 masked2 = masked1 + Mbs; 2273 rowcount = 0; nzcount = 0; 2274 for ( i=0; i<mbs; i++ ) { 2275 dcount = 0; 2276 odcount = 0; 2277 for ( j=0; j<bs; j++ ) { 2278 kmax = locrowlens[rowcount]; 2279 for ( k=0; k<kmax; k++ ) { 2280 tmp = mycols[nzcount++]/bs; 2281 if (!mask[tmp]) { 2282 mask[tmp] = 1; 2283 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2284 else masked1[dcount++] = tmp; 2285 } 2286 } 2287 rowcount++; 2288 } 2289 2290 dlens[i] = dcount; 2291 odlens[i] = odcount; 2292 2293 /* zero out the mask elements we set */ 2294 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2295 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2296 } 2297 2298 /* create our matrix */ 2299 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2300 CHKERRQ(ierr); 2301 A = *newmat; 2302 MatSetOption(A,MAT_COLUMNS_SORTED); 2303 2304 if (!rank) { 2305 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2306 /* read in my part of the matrix numerical values */ 2307 nz = procsnz[0]; 2308 vals = buf; 2309 mycols = ibuf; 2310 if (size == 1) nz -= extra_rows; 2311 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2312 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2313 2314 /* insert into matrix */ 2315 jj = rstart*bs; 2316 for ( i=0; i<m; i++ ) { 2317 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2318 mycols += locrowlens[i]; 2319 vals += locrowlens[i]; 2320 jj++; 2321 } 2322 /* read in other processors (except the last one) and ship out */ 2323 for ( i=1; i<size-1; i++ ) { 2324 nz = procsnz[i]; 2325 vals = buf; 2326 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2327 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2328 } 2329 /* the last proc */ 2330 if ( size != 1 ){ 2331 nz = procsnz[i] - extra_rows; 2332 vals = buf; 2333 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2334 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2335 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2336 } 2337 PetscFree(procsnz); 2338 } else { 2339 /* receive numeric values */ 2340 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2341 2342 /* receive message of values*/ 2343 vals = buf; 2344 mycols = ibuf; 2345 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2346 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2347 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2348 2349 /* insert into matrix */ 2350 jj = rstart*bs; 2351 for ( i=0; i<m; i++ ) { 2352 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2353 mycols += locrowlens[i]; 2354 vals += locrowlens[i]; 2355 jj++; 2356 } 2357 } 2358 PetscFree(locrowlens); 2359 PetscFree(buf); 2360 PetscFree(ibuf); 2361 PetscFree(rowners); 2362 PetscFree(dlens); 2363 PetscFree(mask); 2364 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2365 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 2370 2371 #undef __FUNC__ 2372 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2373 /*@ 2374 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2375 2376 Input Parameters: 2377 . mat - the matrix 2378 . fact - factor 2379 2380 Collective on Mat 2381 2382 Notes: 2383 This can also be set by the command line option: -mat_use_hash_table fact 2384 2385 .keywords: matrix, hashtable, factor, HT 2386 2387 .seealso: MatSetOption() 2388 @*/ 2389 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2390 { 2391 Mat_MPIBAIJ *baij; 2392 2393 PetscFunctionBegin; 2394 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2395 if (mat->type != MATMPIBAIJ) { 2396 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2397 } 2398 baij = (Mat_MPIBAIJ*) mat->data; 2399 baij->ht_fact = fact; 2400 PetscFunctionReturn(0); 2401 } 2402