1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.158 1999/03/11 22:30:18 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 /* 20 Local utility routine that creates a mapping from the global column 21 number to the local number in the off-diagonal part of the local 22 storage of the matrix. This is done in a non scable way since the 23 length of colmap equals the global matrix length. 24 */ 25 #undef __FUNC__ 26 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 27 static int CreateColmap_MPIBAIJ_Private(Mat mat) 28 { 29 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 30 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 31 int nbs = B->nbs,i,bs=B->bs; 32 #if defined (USE_CTABLE) 33 int ierr; 34 #endif 35 36 PetscFunctionBegin; 37 #if defined (USE_CTABLE) 38 ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr); 39 for ( i=0; i<nbs; i++ ){ 40 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 41 } 42 #else 43 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 44 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 45 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 46 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 47 #endif 48 PetscFunctionReturn(0); 49 } 50 51 #define CHUNKSIZE 10 52 53 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 54 { \ 55 \ 56 brow = row/bs; \ 57 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 58 rmax = aimax[brow]; nrow = ailen[brow]; \ 59 bcol = col/bs; \ 60 ridx = row % bs; cidx = col % bs; \ 61 low = 0; high = nrow; \ 62 while (high-low > 3) { \ 63 t = (low+high)/2; \ 64 if (rp[t] > bcol) high = t; \ 65 else low = t; \ 66 } \ 67 for ( _i=low; _i<high; _i++ ) { \ 68 if (rp[_i] > bcol) break; \ 69 if (rp[_i] == bcol) { \ 70 bap = ap + bs2*_i + bs*cidx + ridx; \ 71 if (addv == ADD_VALUES) *bap += value; \ 72 else *bap = value; \ 73 goto a_noinsert; \ 74 } \ 75 } \ 76 if (a->nonew == 1) goto a_noinsert; \ 77 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 78 if (nrow >= rmax) { \ 79 /* there is no extra room in row, therefore enlarge */ \ 80 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 81 Scalar *new_a; \ 82 \ 83 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 84 \ 85 /* malloc new storage space */ \ 86 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 87 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 88 new_j = (int *) (new_a + bs2*new_nz); \ 89 new_i = new_j + new_nz; \ 90 \ 91 /* copy over old data into new slots */ \ 92 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 93 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 94 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 95 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 96 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 97 len*sizeof(int)); \ 98 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 99 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 100 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 101 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 102 /* free up old matrix storage */ \ 103 PetscFree(a->a); \ 104 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 105 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 106 a->singlemalloc = 1; \ 107 \ 108 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 109 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 110 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 111 a->maxnz += bs2*CHUNKSIZE; \ 112 a->reallocs++; \ 113 a->nz++; \ 114 } \ 115 N = nrow++ - 1; \ 116 /* shift up all the later entries in this row */ \ 117 for ( ii=N; ii>=_i; ii-- ) { \ 118 rp[ii+1] = rp[ii]; \ 119 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 120 } \ 121 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 122 rp[_i] = bcol; \ 123 ap[bs2*_i + bs*cidx + ridx] = value; \ 124 a_noinsert:; \ 125 ailen[brow] = nrow; \ 126 } 127 128 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 129 { \ 130 \ 131 brow = row/bs; \ 132 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 133 rmax = bimax[brow]; nrow = bilen[brow]; \ 134 bcol = col/bs; \ 135 ridx = row % bs; cidx = col % bs; \ 136 low = 0; high = nrow; \ 137 while (high-low > 3) { \ 138 t = (low+high)/2; \ 139 if (rp[t] > bcol) high = t; \ 140 else low = t; \ 141 } \ 142 for ( _i=low; _i<high; _i++ ) { \ 143 if (rp[_i] > bcol) break; \ 144 if (rp[_i] == bcol) { \ 145 bap = ap + bs2*_i + bs*cidx + ridx; \ 146 if (addv == ADD_VALUES) *bap += value; \ 147 else *bap = value; \ 148 goto b_noinsert; \ 149 } \ 150 } \ 151 if (b->nonew == 1) goto b_noinsert; \ 152 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 153 if (nrow >= rmax) { \ 154 /* there is no extra room in row, therefore enlarge */ \ 155 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 156 Scalar *new_a; \ 157 \ 158 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 159 \ 160 /* malloc new storage space */ \ 161 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 162 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 163 new_j = (int *) (new_a + bs2*new_nz); \ 164 new_i = new_j + new_nz; \ 165 \ 166 /* copy over old data into new slots */ \ 167 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 168 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 169 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 170 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 171 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 172 len*sizeof(int)); \ 173 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 174 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 175 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 176 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 177 /* free up old matrix storage */ \ 178 PetscFree(b->a); \ 179 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 180 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 181 b->singlemalloc = 1; \ 182 \ 183 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 184 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 185 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 186 b->maxnz += bs2*CHUNKSIZE; \ 187 b->reallocs++; \ 188 b->nz++; \ 189 } \ 190 N = nrow++ - 1; \ 191 /* shift up all the later entries in this row */ \ 192 for ( ii=N; ii>=_i; ii-- ) { \ 193 rp[ii+1] = rp[ii]; \ 194 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 195 } \ 196 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 197 rp[_i] = bcol; \ 198 ap[bs2*_i + bs*cidx + ridx] = value; \ 199 b_noinsert:; \ 200 bilen[brow] = nrow; \ 201 } 202 203 #undef __FUNC__ 204 #define __FUNC__ "MatSetValues_MPIBAIJ" 205 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 206 { 207 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 208 Scalar value; 209 int ierr,i,j,row,col; 210 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 211 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 212 int cend_orig=baij->cend_bs,bs=baij->bs; 213 214 /* Some Variables required in the macro */ 215 Mat A = baij->A; 216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 217 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 218 Scalar *aa=a->a; 219 220 Mat B = baij->B; 221 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 222 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 223 Scalar *ba=b->a; 224 225 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 226 int low,high,t,ridx,cidx,bs2=a->bs2; 227 Scalar *ap,*bap; 228 229 PetscFunctionBegin; 230 for ( i=0; i<m; i++ ) { 231 if (im[i] < 0) continue; 232 #if defined(USE_PETSC_BOPT_g) 233 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 234 #endif 235 if (im[i] >= rstart_orig && im[i] < rend_orig) { 236 row = im[i] - rstart_orig; 237 for ( j=0; j<n; j++ ) { 238 if (in[j] >= cstart_orig && in[j] < cend_orig){ 239 col = in[j] - cstart_orig; 240 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 241 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 242 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 243 } 244 else if (in[j] < 0) continue; 245 #if defined(USE_PETSC_BOPT_g) 246 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 247 #endif 248 else { 249 if (mat->was_assembled) { 250 if (!baij->colmap) { 251 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 252 } 253 #if defined (USE_CTABLE) 254 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr); 255 col = col - 1 + in[j]%bs; 256 #else 257 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 258 #endif 259 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 260 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 261 col = in[j]; 262 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 263 B = baij->B; 264 b = (Mat_SeqBAIJ *) (B)->data; 265 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 266 ba=b->a; 267 } 268 } else col = in[j]; 269 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 270 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 271 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 272 } 273 } 274 } else { 275 if (roworiented && !baij->donotstash) { 276 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 277 } else { 278 if (!baij->donotstash) { 279 row = im[i]; 280 for ( j=0; j<n; j++ ) { 281 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m);CHKERRQ(ierr); 282 } 283 } 284 } 285 } 286 } 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNC__ 291 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 292 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 293 { 294 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 295 Scalar *value,*barray=baij->barray; 296 int ierr,i,j,ii,jj,row,col; 297 int roworiented = baij->roworiented,rstart=baij->rstart ; 298 int rend=baij->rend,cstart=baij->cstart,stepval; 299 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 300 301 if(!barray) { 302 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 303 } 304 305 if (roworiented) { 306 stepval = (n-1)*bs; 307 } else { 308 stepval = (m-1)*bs; 309 } 310 for ( i=0; i<m; i++ ) { 311 if (im[i] < 0) continue; 312 #if defined(USE_PETSC_BOPT_g) 313 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 314 #endif 315 if (im[i] >= rstart && im[i] < rend) { 316 row = im[i] - rstart; 317 for ( j=0; j<n; j++ ) { 318 /* If NumCol = 1 then a copy is not required */ 319 if ((roworiented) && (n == 1)) { 320 barray = v + i*bs2; 321 } else if((!roworiented) && (m == 1)) { 322 barray = v + j*bs2; 323 } else { /* Here a copy is required */ 324 if (roworiented) { 325 value = v + i*(stepval+bs)*bs + j*bs; 326 } else { 327 value = v + j*(stepval+bs)*bs + i*bs; 328 } 329 for ( ii=0; ii<bs; ii++,value+=stepval ) { 330 for (jj=0; jj<bs; jj++ ) { 331 *barray++ = *value++; 332 } 333 } 334 barray -=bs2; 335 } 336 337 if (in[j] >= cstart && in[j] < cend){ 338 col = in[j] - cstart; 339 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 340 } 341 else if (in[j] < 0) continue; 342 #if defined(USE_PETSC_BOPT_g) 343 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 344 #endif 345 else { 346 if (mat->was_assembled) { 347 if (!baij->colmap) { 348 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 349 } 350 351 #if defined(USE_PETSC_BOPT_g) 352 #if defined (USE_CTABLE) 353 { int data; 354 ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr); 355 if((data - 1) % bs) 356 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 357 } 358 #else 359 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 360 #endif 361 #endif 362 #if defined (USE_CTABLE) 363 ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr); 364 col = (col - 1)/bs; 365 #else 366 col = (baij->colmap[in[j]] - 1)/bs; 367 #endif 368 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 369 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 370 col = in[j]; 371 } 372 } 373 else col = in[j]; 374 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 375 } 376 } 377 } else { 378 if (!baij->donotstash) { 379 /* ierr = StashValuesBlocked_Private(&baij->bstash,im[i],n,in,v+i*bs2*n, 380 roworiented,stepval,addv);CHKERRQ(ierr); */ 381 ierr = StashValuesBlocked_Private(&baij->bstash,im[i],n,in,v,m,n, 382 roworiented,i);CHKERRQ(ierr); 383 } 384 } 385 } 386 PetscFunctionReturn(0); 387 } 388 #define HASH_KEY 0.6180339887 389 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 390 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 391 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 392 #undef __FUNC__ 393 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 394 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 395 { 396 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 397 int ierr,i,j,row,col; 398 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 399 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 400 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 401 double tmp; 402 Scalar ** HD = baij->hd,value; 403 #if defined(USE_PETSC_BOPT_g) 404 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 405 #endif 406 407 PetscFunctionBegin; 408 409 for ( i=0; i<m; i++ ) { 410 #if defined(USE_PETSC_BOPT_g) 411 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 412 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 413 #endif 414 row = im[i]; 415 if (row >= rstart_orig && row < rend_orig) { 416 for ( j=0; j<n; j++ ) { 417 col = in[j]; 418 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 419 /* Look up into the Hash Table */ 420 key = (row/bs)*Nbs+(col/bs)+1; 421 h1 = HASH(size,key,tmp); 422 423 424 idx = h1; 425 #if defined(USE_PETSC_BOPT_g) 426 insert_ct++; 427 total_ct++; 428 if (HT[idx] != key) { 429 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 430 if (idx == size) { 431 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 432 if (idx == h1) { 433 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 434 } 435 } 436 } 437 #else 438 if (HT[idx] != key) { 439 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 440 if (idx == size) { 441 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 442 if (idx == h1) { 443 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 444 } 445 } 446 } 447 #endif 448 /* A HASH table entry is found, so insert the values at the correct address */ 449 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 450 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 451 } 452 } else { 453 if (roworiented && !baij->donotstash) { 454 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 455 } else { 456 if (!baij->donotstash) { 457 row = im[i]; 458 for ( j=0; j<n; j++ ) { 459 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m);CHKERRQ(ierr); 460 } 461 } 462 } 463 } 464 } 465 #if defined(USE_PETSC_BOPT_g) 466 baij->ht_total_ct = total_ct; 467 baij->ht_insert_ct = insert_ct; 468 #endif 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 474 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 475 { 476 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 477 int ierr,i,j,ii,jj,row,col,k,l; 478 int roworiented = baij->roworiented,rstart=baij->rstart ; 479 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 480 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 481 double tmp; 482 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 483 #if defined(USE_PETSC_BOPT_g) 484 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 485 #endif 486 487 PetscFunctionBegin; 488 489 if (roworiented) { 490 stepval = (n-1)*bs; 491 } else { 492 stepval = (m-1)*bs; 493 } 494 for ( i=0; i<m; i++ ) { 495 #if defined(USE_PETSC_BOPT_g) 496 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 497 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 498 #endif 499 row = im[i]; 500 v_t = v + i*bs2; 501 if (row >= rstart && row < rend) { 502 for ( j=0; j<n; j++ ) { 503 col = in[j]; 504 505 /* Look up into the Hash Table */ 506 key = row*Nbs+col+1; 507 h1 = HASH(size,key,tmp); 508 509 idx = h1; 510 #if defined(USE_PETSC_BOPT_g) 511 total_ct++; 512 insert_ct++; 513 if (HT[idx] != key) { 514 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 515 if (idx == size) { 516 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 517 if (idx == h1) { 518 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 519 } 520 } 521 } 522 #else 523 if (HT[idx] != key) { 524 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 525 if (idx == size) { 526 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 527 if (idx == h1) { 528 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 529 } 530 } 531 } 532 #endif 533 baij_a = HD[idx]; 534 if (roworiented) { 535 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 536 /* value = v + (i*(stepval+bs)+j)*bs; */ 537 value = v_t; 538 v_t += bs; 539 if (addv == ADD_VALUES) { 540 for ( ii=0; ii<bs; ii++,value+=stepval) { 541 for ( jj=ii; jj<bs2; jj+=bs ) { 542 baij_a[jj] += *value++; 543 } 544 } 545 } else { 546 for ( ii=0; ii<bs; ii++,value+=stepval) { 547 for ( jj=ii; jj<bs2; jj+=bs ) { 548 baij_a[jj] = *value++; 549 } 550 } 551 } 552 } else { 553 value = v + j*(stepval+bs)*bs + i*bs; 554 if (addv == ADD_VALUES) { 555 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 556 for ( jj=0; jj<bs; jj++ ) { 557 baij_a[jj] += *value++; 558 } 559 } 560 } else { 561 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 562 for ( jj=0; jj<bs; jj++ ) { 563 baij_a[jj] = *value++; 564 } 565 } 566 } 567 } 568 } 569 } else { 570 if (!baij->donotstash) { 571 if (roworiented ) { 572 row = im[i]*bs; 573 value = v + i*(stepval+bs)*bs; 574 for ( j=0; j<bs; j++,row++ ) { 575 for ( k=0; k<n; k++ ) { 576 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 577 ierr = StashValues_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr); 578 } 579 } 580 } 581 } else { 582 for ( j=0; j<n; j++ ) { 583 value = v + j*(stepval+bs)*bs + i*bs; 584 col = in[j]*bs; 585 for ( k=0; k<bs; k++,col++,value+=stepval) { 586 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 587 ierr = StashValues_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr); 588 } 589 } 590 } 591 } 592 } 593 } 594 } 595 #if defined(USE_PETSC_BOPT_g) 596 baij->ht_total_ct = total_ct; 597 baij->ht_insert_ct = insert_ct; 598 #endif 599 PetscFunctionReturn(0); 600 } 601 602 #undef __FUNC__ 603 #define __FUNC__ "MatGetValues_MPIBAIJ" 604 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 605 { 606 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 607 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 608 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 609 610 PetscFunctionBegin; 611 for ( i=0; i<m; i++ ) { 612 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 613 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 614 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 615 row = idxm[i] - bsrstart; 616 for ( j=0; j<n; j++ ) { 617 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 618 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 619 if (idxn[j] >= bscstart && idxn[j] < bscend){ 620 col = idxn[j] - bscstart; 621 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 622 } else { 623 if (!baij->colmap) { 624 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 625 } 626 #if defined (USE_CTABLE) 627 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr); 628 data --; 629 #else 630 data = baij->colmap[idxn[j]/bs]-1; 631 #endif 632 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 633 else { 634 col = data + idxn[j]%bs; 635 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 636 } 637 } 638 } 639 } else { 640 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 641 } 642 } 643 PetscFunctionReturn(0); 644 } 645 646 #undef __FUNC__ 647 #define __FUNC__ "MatNorm_MPIBAIJ" 648 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 649 { 650 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 651 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 652 int ierr, i,bs2=baij->bs2; 653 double sum = 0.0; 654 Scalar *v; 655 656 PetscFunctionBegin; 657 if (baij->size == 1) { 658 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 659 } else { 660 if (type == NORM_FROBENIUS) { 661 v = amat->a; 662 for (i=0; i<amat->nz*bs2; i++ ) { 663 #if defined(USE_PETSC_COMPLEX) 664 sum += PetscReal(PetscConj(*v)*(*v)); v++; 665 #else 666 sum += (*v)*(*v); v++; 667 #endif 668 } 669 v = bmat->a; 670 for (i=0; i<bmat->nz*bs2; i++ ) { 671 #if defined(USE_PETSC_COMPLEX) 672 sum += PetscReal(PetscConj(*v)*(*v)); v++; 673 #else 674 sum += (*v)*(*v); v++; 675 #endif 676 } 677 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 678 *norm = sqrt(*norm); 679 } else { 680 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 681 } 682 } 683 PetscFunctionReturn(0); 684 } 685 686 687 /* 688 Creates the hash table, and sets the table 689 This table is created only once. 690 If new entried need to be added to the matrix 691 then the hash table has to be destroyed and 692 recreated. 693 */ 694 #undef __FUNC__ 695 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 696 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 697 { 698 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 699 Mat A = baij->A, B=baij->B; 700 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 701 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 702 int size,bs2=baij->bs2,rstart=baij->rstart; 703 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 704 int *HT,key; 705 Scalar **HD; 706 double tmp; 707 #if defined(USE_PETSC_BOPT_g) 708 int ct=0,max=0; 709 #endif 710 711 PetscFunctionBegin; 712 baij->ht_size=(int)(factor*nz); 713 size = baij->ht_size; 714 715 if (baij->ht) { 716 PetscFunctionReturn(0); 717 } 718 719 /* Allocate Memory for Hash Table */ 720 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 721 baij->ht = (int*)(baij->hd + size); 722 HD = baij->hd; 723 HT = baij->ht; 724 725 726 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 727 728 729 /* Loop Over A */ 730 for ( i=0; i<a->mbs; i++ ) { 731 for ( j=ai[i]; j<ai[i+1]; j++ ) { 732 row = i+rstart; 733 col = aj[j]+cstart; 734 735 key = row*Nbs + col + 1; 736 h1 = HASH(size,key,tmp); 737 for ( k=0; k<size; k++ ){ 738 if (HT[(h1+k)%size] == 0.0) { 739 HT[(h1+k)%size] = key; 740 HD[(h1+k)%size] = a->a + j*bs2; 741 break; 742 #if defined(USE_PETSC_BOPT_g) 743 } else { 744 ct++; 745 #endif 746 } 747 } 748 #if defined(USE_PETSC_BOPT_g) 749 if (k> max) max = k; 750 #endif 751 } 752 } 753 /* Loop Over B */ 754 for ( i=0; i<b->mbs; i++ ) { 755 for ( j=bi[i]; j<bi[i+1]; j++ ) { 756 row = i+rstart; 757 col = garray[bj[j]]; 758 key = row*Nbs + col + 1; 759 h1 = HASH(size,key,tmp); 760 for ( k=0; k<size; k++ ){ 761 if (HT[(h1+k)%size] == 0.0) { 762 HT[(h1+k)%size] = key; 763 HD[(h1+k)%size] = b->a + j*bs2; 764 break; 765 #if defined(USE_PETSC_BOPT_g) 766 } else { 767 ct++; 768 #endif 769 } 770 } 771 #if defined(USE_PETSC_BOPT_g) 772 if (k> max) max = k; 773 #endif 774 } 775 } 776 777 /* Print Summary */ 778 #if defined(USE_PETSC_BOPT_g) 779 for ( i=0,j=0; i<size; i++) 780 if (HT[i]) {j++;} 781 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 782 (j== 0)? 0.0:((double)(ct+j))/j,max); 783 #endif 784 PetscFunctionReturn(0); 785 } 786 787 #undef __FUNC__ 788 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 789 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 790 { 791 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 792 int ierr; 793 InsertMode addv; 794 795 PetscFunctionBegin; 796 if (baij->donotstash) { 797 PetscFunctionReturn(0); 798 } 799 800 /* make sure all processors are either in INSERTMODE or ADDMODE */ 801 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 802 if (addv == (ADD_VALUES|INSERT_VALUES)) { 803 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 804 } 805 mat->insertmode = addv; /* in case this processor had no cache */ 806 807 ierr = StashScatterBegin_Private(&baij->stash,baij->rowners); CHKERRQ(ierr); 808 ierr = StashScatterBegin_Private(&baij->bstash,baij->rowners); CHKERRQ(ierr); 809 810 /* Free cache space */ 811 PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNC__ 816 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 817 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 818 { 819 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 820 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 821 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 822 int *row,*col,other_disassembled,r1,r2,r3; 823 Scalar *val; 824 InsertMode addv = mat->insertmode; 825 826 PetscFunctionBegin; 827 if (!baij->donotstash) { 828 while (1) { 829 ierr = StashScatterGetMesg_Private(&baij->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 830 if (!flg) break; 831 832 for ( i=0; i<n; ) { 833 /* Now identify the consecutive vals belonging to the same row */ 834 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 835 if (j < n) ncols = j-i; 836 else ncols = n-i; 837 /* Now assemble all these values with a single function call */ 838 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 839 i = j; 840 } 841 } 842 ierr = StashScatterEnd_Private(&baij->stash); CHKERRQ(ierr); 843 /* Now process the block-stash. Since the values are stashed column-oriented, 844 set the roworiented flag to column oriented, and after MatSetValues() 845 restore the original flags */ 846 r1 = baij->roworiented; 847 r2 = a->roworiented; 848 r3 = b->roworiented; 849 baij->roworiented = 0; 850 a->roworiented = 0; 851 b->roworiented = 0; 852 while (1) { 853 ierr = StashScatterGetMesg_Private(&baij->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 854 if (!flg) break; 855 856 for ( i=0; i<n; ) { 857 /* Now identify the consecutive vals belonging to the same row */ 858 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 859 if (j < n) ncols = j-i; 860 else ncols = n-i; 861 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr); 862 i = j; 863 } 864 } 865 ierr = StashScatterEnd_Private(&baij->bstash); CHKERRQ(ierr); 866 baij->roworiented = r1; 867 a->roworiented = r2; 868 b->roworiented = r3; 869 } 870 871 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 872 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 873 874 /* determine if any processor has disassembled, if so we must 875 also disassemble ourselfs, in order that we may reassemble. */ 876 /* 877 if nonzero structure of submatrix B cannot change then we know that 878 no processor disassembled thus we can skip this stuff 879 */ 880 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 881 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 882 if (mat->was_assembled && !other_disassembled) { 883 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 884 } 885 } 886 887 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 888 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 889 } 890 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 891 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 892 893 #if defined(USE_PETSC_BOPT_g) 894 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 895 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 896 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 897 baij->ht_total_ct = 0; 898 baij->ht_insert_ct = 0; 899 } 900 #endif 901 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 902 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 903 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 904 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 905 } 906 907 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNC__ 912 #define __FUNC__ "MatView_MPIBAIJ_Binary" 913 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 914 { 915 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 916 int ierr; 917 918 PetscFunctionBegin; 919 if (baij->size == 1) { 920 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 921 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNC__ 926 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 927 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 928 { 929 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 930 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 931 FILE *fd; 932 ViewerType vtype; 933 934 PetscFunctionBegin; 935 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 936 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 937 ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr); 938 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 939 MatInfo info; 940 MPI_Comm_rank(mat->comm,&rank); 941 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 942 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 943 PetscSequentialPhaseBegin(mat->comm,1); 944 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 945 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 946 baij->bs,(int)info.memory); 947 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 948 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 949 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 950 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 951 fflush(fd); 952 PetscSequentialPhaseEnd(mat->comm,1); 953 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 956 PetscPrintf(mat->comm," block size is %d\n",bs); 957 PetscFunctionReturn(0); 958 } 959 } 960 961 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 962 Draw draw; 963 PetscTruth isnull; 964 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 965 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 966 } 967 968 if (size == 1) { 969 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 970 } else { 971 /* assemble the entire matrix onto first processor. */ 972 Mat A; 973 Mat_SeqBAIJ *Aloc; 974 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 975 int mbs = baij->mbs; 976 Scalar *a; 977 978 if (!rank) { 979 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 980 } else { 981 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 982 } 983 PLogObjectParent(mat,A); 984 985 /* copy over the A part */ 986 Aloc = (Mat_SeqBAIJ*) baij->A->data; 987 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 988 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 989 990 for ( i=0; i<mbs; i++ ) { 991 rvals[0] = bs*(baij->rstart + i); 992 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 993 for ( j=ai[i]; j<ai[i+1]; j++ ) { 994 col = (baij->cstart+aj[j])*bs; 995 for (k=0; k<bs; k++ ) { 996 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 997 col++; a += bs; 998 } 999 } 1000 } 1001 /* copy over the B part */ 1002 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1003 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1004 for ( i=0; i<mbs; i++ ) { 1005 rvals[0] = bs*(baij->rstart + i); 1006 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1007 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1008 col = baij->garray[aj[j]]*bs; 1009 for (k=0; k<bs; k++ ) { 1010 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1011 col++; a += bs; 1012 } 1013 } 1014 } 1015 PetscFree(rvals); 1016 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1017 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1018 /* 1019 Everyone has to call to draw the matrix since the graphics waits are 1020 synchronized across all processors that share the Draw object 1021 */ 1022 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1023 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1024 } 1025 ierr = MatDestroy(A); CHKERRQ(ierr); 1026 } 1027 PetscFunctionReturn(0); 1028 } 1029 1030 1031 1032 #undef __FUNC__ 1033 #define __FUNC__ "MatView_MPIBAIJ" 1034 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1035 { 1036 int ierr; 1037 ViewerType vtype; 1038 1039 PetscFunctionBegin; 1040 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1041 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1042 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 1043 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 1044 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1045 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1046 } else { 1047 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1048 } 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNC__ 1053 #define __FUNC__ "MatDestroy_MPIBAIJ" 1054 int MatDestroy_MPIBAIJ(Mat mat) 1055 { 1056 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1057 int ierr; 1058 1059 PetscFunctionBegin; 1060 if (--mat->refct > 0) PetscFunctionReturn(0); 1061 1062 if (mat->mapping) { 1063 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 1064 } 1065 if (mat->bmapping) { 1066 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 1067 } 1068 if (mat->rmap) { 1069 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1070 } 1071 if (mat->cmap) { 1072 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1073 } 1074 #if defined(USE_PETSC_LOG) 1075 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1076 #endif 1077 1078 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1079 ierr = StashDestroy_Private(&baij->bstash); CHKERRQ(ierr); 1080 PetscFree(baij->rowners); 1081 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1082 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1083 #if defined (USE_CTABLE) 1084 if (baij->colmap) TableDelete(baij->colmap); 1085 #else 1086 if (baij->colmap) PetscFree(baij->colmap); 1087 #endif 1088 if (baij->garray) PetscFree(baij->garray); 1089 if (baij->lvec) VecDestroy(baij->lvec); 1090 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1091 if (baij->rowvalues) PetscFree(baij->rowvalues); 1092 if (baij->barray) PetscFree(baij->barray); 1093 if (baij->hd) PetscFree(baij->hd); 1094 PetscFree(baij); 1095 PLogObjectDestroy(mat); 1096 PetscHeaderDestroy(mat); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNC__ 1101 #define __FUNC__ "MatMult_MPIBAIJ" 1102 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1105 int ierr, nt; 1106 1107 PetscFunctionBegin; 1108 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1109 if (nt != a->n) { 1110 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1111 } 1112 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1113 if (nt != a->m) { 1114 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1115 } 1116 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1118 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1120 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNC__ 1125 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1126 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1127 { 1128 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1129 int ierr; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1133 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1134 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1135 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1136 PetscFunctionReturn(0); 1137 } 1138 1139 #undef __FUNC__ 1140 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1141 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1142 { 1143 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1144 int ierr; 1145 1146 PetscFunctionBegin; 1147 /* do nondiagonal part */ 1148 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1149 /* send it on its way */ 1150 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1151 /* do local part */ 1152 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1153 /* receive remote parts: note this assumes the values are not actually */ 1154 /* inserted in yy until the next line, which is true for my implementation*/ 1155 /* but is not perhaps always true. */ 1156 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNC__ 1161 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1162 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1163 { 1164 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1165 int ierr; 1166 1167 PetscFunctionBegin; 1168 /* do nondiagonal part */ 1169 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1170 /* send it on its way */ 1171 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1172 /* do local part */ 1173 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1174 /* receive remote parts: note this assumes the values are not actually */ 1175 /* inserted in yy until the next line, which is true for my implementation*/ 1176 /* but is not perhaps always true. */ 1177 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1178 PetscFunctionReturn(0); 1179 } 1180 1181 /* 1182 This only works correctly for square matrices where the subblock A->A is the 1183 diagonal block 1184 */ 1185 #undef __FUNC__ 1186 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1187 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1188 { 1189 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1190 int ierr; 1191 1192 PetscFunctionBegin; 1193 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1194 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNC__ 1199 #define __FUNC__ "MatScale_MPIBAIJ" 1200 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1201 { 1202 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1203 int ierr; 1204 1205 PetscFunctionBegin; 1206 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1207 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNC__ 1212 #define __FUNC__ "MatGetSize_MPIBAIJ" 1213 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1214 { 1215 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1216 1217 PetscFunctionBegin; 1218 if (m) *m = mat->M; 1219 if (n) *n = mat->N; 1220 PetscFunctionReturn(0); 1221 } 1222 1223 #undef __FUNC__ 1224 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1225 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1226 { 1227 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1228 1229 PetscFunctionBegin; 1230 *m = mat->m; *n = mat->n; 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNC__ 1235 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1236 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1237 { 1238 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1239 1240 PetscFunctionBegin; 1241 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNC__ 1246 #define __FUNC__ "MatGetRow_MPIBAIJ" 1247 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1248 { 1249 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1250 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1251 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1252 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1253 int *cmap, *idx_p,cstart = mat->cstart; 1254 1255 PetscFunctionBegin; 1256 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1257 mat->getrowactive = PETSC_TRUE; 1258 1259 if (!mat->rowvalues && (idx || v)) { 1260 /* 1261 allocate enough space to hold information from the longest row. 1262 */ 1263 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1264 int max = 1,mbs = mat->mbs,tmp; 1265 for ( i=0; i<mbs; i++ ) { 1266 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1267 if (max < tmp) { max = tmp; } 1268 } 1269 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1270 CHKPTRQ(mat->rowvalues); 1271 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1272 } 1273 1274 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1275 lrow = row - brstart; 1276 1277 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1278 if (!v) {pvA = 0; pvB = 0;} 1279 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1280 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1281 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1282 nztot = nzA + nzB; 1283 1284 cmap = mat->garray; 1285 if (v || idx) { 1286 if (nztot) { 1287 /* Sort by increasing column numbers, assuming A and B already sorted */ 1288 int imark = -1; 1289 if (v) { 1290 *v = v_p = mat->rowvalues; 1291 for ( i=0; i<nzB; i++ ) { 1292 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1293 else break; 1294 } 1295 imark = i; 1296 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1297 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1298 } 1299 if (idx) { 1300 *idx = idx_p = mat->rowindices; 1301 if (imark > -1) { 1302 for ( i=0; i<imark; i++ ) { 1303 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1304 } 1305 } else { 1306 for ( i=0; i<nzB; i++ ) { 1307 if (cmap[cworkB[i]/bs] < cstart) 1308 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1309 else break; 1310 } 1311 imark = i; 1312 } 1313 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1314 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1315 } 1316 } else { 1317 if (idx) *idx = 0; 1318 if (v) *v = 0; 1319 } 1320 } 1321 *nz = nztot; 1322 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1323 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #undef __FUNC__ 1328 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1329 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1330 { 1331 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1332 1333 PetscFunctionBegin; 1334 if (baij->getrowactive == PETSC_FALSE) { 1335 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1336 } 1337 baij->getrowactive = PETSC_FALSE; 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNC__ 1342 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1343 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1344 { 1345 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1346 1347 PetscFunctionBegin; 1348 *bs = baij->bs; 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNC__ 1353 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1354 int MatZeroEntries_MPIBAIJ(Mat A) 1355 { 1356 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1357 int ierr; 1358 1359 PetscFunctionBegin; 1360 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1361 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNC__ 1366 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1367 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1368 { 1369 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1370 Mat A = a->A, B = a->B; 1371 int ierr; 1372 double isend[5], irecv[5]; 1373 1374 PetscFunctionBegin; 1375 info->block_size = (double)a->bs; 1376 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1377 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1378 isend[3] = info->memory; isend[4] = info->mallocs; 1379 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1380 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1381 isend[3] += info->memory; isend[4] += info->mallocs; 1382 if (flag == MAT_LOCAL) { 1383 info->nz_used = isend[0]; 1384 info->nz_allocated = isend[1]; 1385 info->nz_unneeded = isend[2]; 1386 info->memory = isend[3]; 1387 info->mallocs = isend[4]; 1388 } else if (flag == MAT_GLOBAL_MAX) { 1389 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1390 info->nz_used = irecv[0]; 1391 info->nz_allocated = irecv[1]; 1392 info->nz_unneeded = irecv[2]; 1393 info->memory = irecv[3]; 1394 info->mallocs = irecv[4]; 1395 } else if (flag == MAT_GLOBAL_SUM) { 1396 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1397 info->nz_used = irecv[0]; 1398 info->nz_allocated = irecv[1]; 1399 info->nz_unneeded = irecv[2]; 1400 info->memory = irecv[3]; 1401 info->mallocs = irecv[4]; 1402 } else { 1403 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1404 } 1405 info->rows_global = (double)a->M; 1406 info->columns_global = (double)a->N; 1407 info->rows_local = (double)a->m; 1408 info->columns_local = (double)a->N; 1409 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1410 info->fill_ratio_needed = 0; 1411 info->factor_mallocs = 0; 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNC__ 1416 #define __FUNC__ "MatSetOption_MPIBAIJ" 1417 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1418 { 1419 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1420 int ierr; 1421 1422 PetscFunctionBegin; 1423 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1424 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1425 op == MAT_COLUMNS_UNSORTED || 1426 op == MAT_COLUMNS_SORTED || 1427 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1428 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1429 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1430 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1431 } else if (op == MAT_ROW_ORIENTED) { 1432 a->roworiented = 1; 1433 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1434 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1435 } else if (op == MAT_ROWS_SORTED || 1436 op == MAT_ROWS_UNSORTED || 1437 op == MAT_SYMMETRIC || 1438 op == MAT_STRUCTURALLY_SYMMETRIC || 1439 op == MAT_YES_NEW_DIAGONALS || 1440 op == MAT_USE_HASH_TABLE) { 1441 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1442 } else if (op == MAT_COLUMN_ORIENTED) { 1443 a->roworiented = 0; 1444 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1445 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1446 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1447 a->donotstash = 1; 1448 } else if (op == MAT_NO_NEW_DIAGONALS) { 1449 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1450 } else if (op == MAT_USE_HASH_TABLE) { 1451 a->ht_flag = 1; 1452 } else { 1453 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNC__ 1459 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1460 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1461 { 1462 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1463 Mat_SeqBAIJ *Aloc; 1464 Mat B; 1465 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1466 int bs=baij->bs,mbs=baij->mbs; 1467 Scalar *a; 1468 1469 PetscFunctionBegin; 1470 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1471 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1472 CHKERRQ(ierr); 1473 1474 /* copy over the A part */ 1475 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1476 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1477 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1478 1479 for ( i=0; i<mbs; i++ ) { 1480 rvals[0] = bs*(baij->rstart + i); 1481 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1482 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1483 col = (baij->cstart+aj[j])*bs; 1484 for (k=0; k<bs; k++ ) { 1485 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1486 col++; a += bs; 1487 } 1488 } 1489 } 1490 /* copy over the B part */ 1491 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1492 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1493 for ( i=0; i<mbs; i++ ) { 1494 rvals[0] = bs*(baij->rstart + i); 1495 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1496 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1497 col = baij->garray[aj[j]]*bs; 1498 for (k=0; k<bs; k++ ) { 1499 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1500 col++; a += bs; 1501 } 1502 } 1503 } 1504 PetscFree(rvals); 1505 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1506 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1507 1508 if (matout != PETSC_NULL) { 1509 *matout = B; 1510 } else { 1511 PetscOps *Abops; 1512 MatOps Aops; 1513 1514 /* This isn't really an in-place transpose .... but free data structures from baij */ 1515 PetscFree(baij->rowners); 1516 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1517 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1518 #if defined (USE_CTABLE) 1519 if (baij->colmap) TableDelete(baij->colmap); 1520 #else 1521 if (baij->colmap) PetscFree(baij->colmap); 1522 #endif 1523 if (baij->garray) PetscFree(baij->garray); 1524 if (baij->lvec) VecDestroy(baij->lvec); 1525 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1526 PetscFree(baij); 1527 1528 /* 1529 This is horrible, horrible code. We need to keep the 1530 A pointers for the bops and ops but copy everything 1531 else from C. 1532 */ 1533 Abops = A->bops; 1534 Aops = A->ops; 1535 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1536 A->bops = Abops; 1537 A->ops = Aops; 1538 1539 PetscHeaderDestroy(B); 1540 } 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNC__ 1545 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1546 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1547 { 1548 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1549 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1550 int ierr,s1,s2,s3; 1551 1552 PetscFunctionBegin; 1553 if (ll) { 1554 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1555 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1556 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1557 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1558 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1559 } 1560 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1561 PetscFunctionReturn(0); 1562 } 1563 1564 #undef __FUNC__ 1565 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1566 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1567 { 1568 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1569 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1570 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1571 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1572 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1573 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1574 MPI_Comm comm = A->comm; 1575 MPI_Request *send_waits,*recv_waits; 1576 MPI_Status recv_status,*send_status; 1577 IS istmp; 1578 1579 PetscFunctionBegin; 1580 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1581 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1582 1583 /* first count number of contributors to each processor */ 1584 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1585 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1586 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1587 for ( i=0; i<N; i++ ) { 1588 idx = rows[i]; 1589 found = 0; 1590 for ( j=0; j<size; j++ ) { 1591 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1592 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1593 } 1594 } 1595 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1596 } 1597 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1598 1599 /* inform other processors of number of messages and max length*/ 1600 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1601 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1602 nrecvs = work[rank]; 1603 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1604 nmax = work[rank]; 1605 PetscFree(work); 1606 1607 /* post receives: */ 1608 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1609 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1610 for ( i=0; i<nrecvs; i++ ) { 1611 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1612 } 1613 1614 /* do sends: 1615 1) starts[i] gives the starting index in svalues for stuff going to 1616 the ith processor 1617 */ 1618 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1619 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1620 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1621 starts[0] = 0; 1622 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1623 for ( i=0; i<N; i++ ) { 1624 svalues[starts[owner[i]]++] = rows[i]; 1625 } 1626 ISRestoreIndices(is,&rows); 1627 1628 starts[0] = 0; 1629 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1630 count = 0; 1631 for ( i=0; i<size; i++ ) { 1632 if (procs[i]) { 1633 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1634 } 1635 } 1636 PetscFree(starts); 1637 1638 base = owners[rank]*bs; 1639 1640 /* wait on receives */ 1641 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1642 source = lens + nrecvs; 1643 count = nrecvs; slen = 0; 1644 while (count) { 1645 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1646 /* unpack receives into our local space */ 1647 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1648 source[imdex] = recv_status.MPI_SOURCE; 1649 lens[imdex] = n; 1650 slen += n; 1651 count--; 1652 } 1653 PetscFree(recv_waits); 1654 1655 /* move the data into the send scatter */ 1656 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1657 count = 0; 1658 for ( i=0; i<nrecvs; i++ ) { 1659 values = rvalues + i*nmax; 1660 for ( j=0; j<lens[i]; j++ ) { 1661 lrows[count++] = values[j] - base; 1662 } 1663 } 1664 PetscFree(rvalues); PetscFree(lens); 1665 PetscFree(owner); PetscFree(nprocs); 1666 1667 /* actually zap the local rows */ 1668 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1669 PLogObjectParent(A,istmp); 1670 1671 /* 1672 Zero the required rows. If the "diagonal block" of the matrix 1673 is square and the user wishes to set the diagonal we use seperate 1674 code so that MatSetValues() is not called for each diagonal allocating 1675 new memory, thus calling lots of mallocs and slowing things down. 1676 1677 Contributed by: Mathew Knepley 1678 */ 1679 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1680 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1681 if (diag && (l->A->M == l->A->N)) { 1682 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1683 } else if (diag) { 1684 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1685 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1686 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1687 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1688 } 1689 for ( i = 0; i < slen; i++ ) { 1690 row = lrows[i] + rstart_bs; 1691 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1692 } 1693 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1694 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1695 } else { 1696 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1697 } 1698 1699 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1700 PetscFree(lrows); 1701 1702 /* wait on sends */ 1703 if (nsends) { 1704 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1705 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1706 PetscFree(send_status); 1707 } 1708 PetscFree(send_waits); PetscFree(svalues); 1709 1710 PetscFunctionReturn(0); 1711 } 1712 1713 #undef __FUNC__ 1714 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1715 int MatPrintHelp_MPIBAIJ(Mat A) 1716 { 1717 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1718 MPI_Comm comm = A->comm; 1719 static int called = 0; 1720 int ierr; 1721 1722 PetscFunctionBegin; 1723 if (!a->rank) { 1724 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1725 } 1726 if (called) {PetscFunctionReturn(0);} else called = 1; 1727 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1728 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNC__ 1733 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1734 int MatSetUnfactored_MPIBAIJ(Mat A) 1735 { 1736 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1737 int ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1741 PetscFunctionReturn(0); 1742 } 1743 1744 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1745 1746 /* -------------------------------------------------------------------*/ 1747 static struct _MatOps MatOps_Values = { 1748 MatSetValues_MPIBAIJ, 1749 MatGetRow_MPIBAIJ, 1750 MatRestoreRow_MPIBAIJ, 1751 MatMult_MPIBAIJ, 1752 MatMultAdd_MPIBAIJ, 1753 MatMultTrans_MPIBAIJ, 1754 MatMultTransAdd_MPIBAIJ, 1755 0, 1756 0, 1757 0, 1758 0, 1759 0, 1760 0, 1761 0, 1762 MatTranspose_MPIBAIJ, 1763 MatGetInfo_MPIBAIJ, 1764 0, 1765 MatGetDiagonal_MPIBAIJ, 1766 MatDiagonalScale_MPIBAIJ, 1767 MatNorm_MPIBAIJ, 1768 MatAssemblyBegin_MPIBAIJ, 1769 MatAssemblyEnd_MPIBAIJ, 1770 0, 1771 MatSetOption_MPIBAIJ, 1772 MatZeroEntries_MPIBAIJ, 1773 MatZeroRows_MPIBAIJ, 1774 0, 1775 0, 1776 0, 1777 0, 1778 MatGetSize_MPIBAIJ, 1779 MatGetLocalSize_MPIBAIJ, 1780 MatGetOwnershipRange_MPIBAIJ, 1781 0, 1782 0, 1783 0, 1784 0, 1785 MatDuplicate_MPIBAIJ, 1786 0, 1787 0, 1788 0, 1789 0, 1790 0, 1791 MatGetSubMatrices_MPIBAIJ, 1792 MatIncreaseOverlap_MPIBAIJ, 1793 MatGetValues_MPIBAIJ, 1794 0, 1795 MatPrintHelp_MPIBAIJ, 1796 MatScale_MPIBAIJ, 1797 0, 1798 0, 1799 0, 1800 MatGetBlockSize_MPIBAIJ, 1801 0, 1802 0, 1803 0, 1804 0, 1805 0, 1806 0, 1807 MatSetUnfactored_MPIBAIJ, 1808 0, 1809 MatSetValuesBlocked_MPIBAIJ, 1810 0, 1811 0, 1812 0, 1813 MatGetMaps_Petsc}; 1814 1815 1816 EXTERN_C_BEGIN 1817 #undef __FUNC__ 1818 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1819 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1820 { 1821 PetscFunctionBegin; 1822 *a = ((Mat_MPIBAIJ *)A->data)->A; 1823 *iscopy = PETSC_FALSE; 1824 PetscFunctionReturn(0); 1825 } 1826 EXTERN_C_END 1827 1828 #undef __FUNC__ 1829 #define __FUNC__ "MatCreateMPIBAIJ" 1830 /*@C 1831 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1832 (block compressed row). For good matrix assembly performance 1833 the user should preallocate the matrix storage by setting the parameters 1834 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1835 performance can be increased by more than a factor of 50. 1836 1837 Collective on MPI_Comm 1838 1839 Input Parameters: 1840 + comm - MPI communicator 1841 . bs - size of blockk 1842 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1843 This value should be the same as the local size used in creating the 1844 y vector for the matrix-vector product y = Ax. 1845 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1846 This value should be the same as the local size used in creating the 1847 x vector for the matrix-vector product y = Ax. 1848 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1849 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1850 . d_nz - number of block nonzeros per block row in diagonal portion of local 1851 submatrix (same for all local rows) 1852 . d_nzz - array containing the number of block nonzeros in the various block rows 1853 of the in diagonal portion of the local (possibly different for each block 1854 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1855 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1856 submatrix (same for all local rows). 1857 - o_nzz - array containing the number of nonzeros in the various block rows of the 1858 off-diagonal portion of the local submatrix (possibly different for 1859 each block row) or PETSC_NULL. 1860 1861 Output Parameter: 1862 . A - the matrix 1863 1864 Options Database Keys: 1865 . -mat_no_unroll - uses code that does not unroll the loops in the 1866 block calculations (much slower) 1867 . -mat_block_size - size of the blocks to use 1868 . -mat_mpi - use the parallel matrix data structures even on one processor 1869 (defaults to using SeqBAIJ format on one processor) 1870 1871 Notes: 1872 The user MUST specify either the local or global matrix dimensions 1873 (possibly both). 1874 1875 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1876 than it must be used on all processors that share the object for that argument. 1877 1878 Storage Information: 1879 For a square global matrix we define each processor's diagonal portion 1880 to be its local rows and the corresponding columns (a square submatrix); 1881 each processor's off-diagonal portion encompasses the remainder of the 1882 local matrix (a rectangular submatrix). 1883 1884 The user can specify preallocated storage for the diagonal part of 1885 the local submatrix with either d_nz or d_nnz (not both). Set 1886 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1887 memory allocation. Likewise, specify preallocated storage for the 1888 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1889 1890 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1891 the figure below we depict these three local rows and all columns (0-11). 1892 1893 .vb 1894 0 1 2 3 4 5 6 7 8 9 10 11 1895 ------------------- 1896 row 3 | o o o d d d o o o o o o 1897 row 4 | o o o d d d o o o o o o 1898 row 5 | o o o d d d o o o o o o 1899 ------------------- 1900 .ve 1901 1902 Thus, any entries in the d locations are stored in the d (diagonal) 1903 submatrix, and any entries in the o locations are stored in the 1904 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1905 stored simply in the MATSEQBAIJ format for compressed row storage. 1906 1907 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1908 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1909 In general, for PDE problems in which most nonzeros are near the diagonal, 1910 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1911 or you will get TERRIBLE performance; see the users' manual chapter on 1912 matrices. 1913 1914 Level: intermediate 1915 1916 .keywords: matrix, block, aij, compressed row, sparse, parallel 1917 1918 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1919 @*/ 1920 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1921 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1922 { 1923 Mat B; 1924 Mat_MPIBAIJ *b; 1925 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1926 int flag1 = 0,flag2 = 0; 1927 1928 PetscFunctionBegin; 1929 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1930 1931 MPI_Comm_size(comm,&size); 1932 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 1933 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1934 if (!flag1 && !flag2 && size == 1) { 1935 if (M == PETSC_DECIDE) M = m; 1936 if (N == PETSC_DECIDE) N = n; 1937 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 *A = 0; 1942 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 1943 PLogObjectCreate(B); 1944 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1945 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1946 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1947 1948 B->ops->destroy = MatDestroy_MPIBAIJ; 1949 B->ops->view = MatView_MPIBAIJ; 1950 B->mapping = 0; 1951 B->factor = 0; 1952 B->assembled = PETSC_FALSE; 1953 1954 B->insertmode = NOT_SET_VALUES; 1955 MPI_Comm_rank(comm,&b->rank); 1956 MPI_Comm_size(comm,&b->size); 1957 1958 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1959 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1960 } 1961 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1962 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1963 } 1964 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1965 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1966 } 1967 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1968 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1969 1970 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1971 work[0] = m; work[1] = n; 1972 mbs = m/bs; nbs = n/bs; 1973 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1974 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1975 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1976 } 1977 if (m == PETSC_DECIDE) { 1978 Mbs = M/bs; 1979 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1980 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1981 m = mbs*bs; 1982 } 1983 if (n == PETSC_DECIDE) { 1984 Nbs = N/bs; 1985 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1986 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1987 n = nbs*bs; 1988 } 1989 if (mbs*bs != m || nbs*bs != n) { 1990 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1991 } 1992 1993 b->m = m; B->m = m; 1994 b->n = n; B->n = n; 1995 b->N = N; B->N = N; 1996 b->M = M; B->M = M; 1997 b->bs = bs; 1998 b->bs2 = bs*bs; 1999 b->mbs = mbs; 2000 b->nbs = nbs; 2001 b->Mbs = Mbs; 2002 b->Nbs = Nbs; 2003 2004 /* the information in the maps duplicates the information computed below, eventually 2005 we should remove the duplicate information that is not contained in the maps */ 2006 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2007 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2008 2009 /* build local table of row and column ownerships */ 2010 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 2011 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2012 b->cowners = b->rowners + b->size + 2; 2013 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2014 b->rowners[0] = 0; 2015 for ( i=2; i<=b->size; i++ ) { 2016 b->rowners[i] += b->rowners[i-1]; 2017 } 2018 b->rstart = b->rowners[b->rank]; 2019 b->rend = b->rowners[b->rank+1]; 2020 b->rstart_bs = b->rstart * bs; 2021 b->rend_bs = b->rend * bs; 2022 2023 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2024 b->cowners[0] = 0; 2025 for ( i=2; i<=b->size; i++ ) { 2026 b->cowners[i] += b->cowners[i-1]; 2027 } 2028 b->cstart = b->cowners[b->rank]; 2029 b->cend = b->cowners[b->rank+1]; 2030 b->cstart_bs = b->cstart * bs; 2031 b->cend_bs = b->cend * bs; 2032 2033 2034 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2035 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 2036 PLogObjectParent(B,b->A); 2037 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2038 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 2039 PLogObjectParent(B,b->B); 2040 2041 /* build cache for off array entries formed */ 2042 ierr = StashCreate_Private(comm,1,bs,&b->stash); CHKERRQ(ierr); 2043 ierr = StashCreate_Private(comm,bs,bs,&b->bstash); CHKERRQ(ierr); 2044 b->donotstash = 0; 2045 b->colmap = 0; 2046 b->garray = 0; 2047 b->roworiented = 1; 2048 2049 /* stuff used in block assembly */ 2050 b->barray = 0; 2051 2052 /* stuff used for matrix vector multiply */ 2053 b->lvec = 0; 2054 b->Mvctx = 0; 2055 2056 /* stuff for MatGetRow() */ 2057 b->rowindices = 0; 2058 b->rowvalues = 0; 2059 b->getrowactive = PETSC_FALSE; 2060 2061 /* hash table stuff */ 2062 b->ht = 0; 2063 b->hd = 0; 2064 b->ht_size = 0; 2065 b->ht_flag = 0; 2066 b->ht_fact = 0; 2067 b->ht_total_ct = 0; 2068 b->ht_insert_ct = 0; 2069 2070 *A = B; 2071 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2072 if (flg) { 2073 double fact = 1.39; 2074 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2075 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2076 if (fact <= 1.0) fact = 1.39; 2077 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2078 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2079 } 2080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2081 "MatGetDiagonalBlock_MPIBAIJ", 2082 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2083 PetscFunctionReturn(0); 2084 } 2085 2086 #undef __FUNC__ 2087 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2088 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2089 { 2090 Mat mat; 2091 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2092 int ierr, len=0, flg; 2093 2094 PetscFunctionBegin; 2095 *newmat = 0; 2096 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2097 PLogObjectCreate(mat); 2098 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2099 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2100 mat->ops->destroy = MatDestroy_MPIBAIJ; 2101 mat->ops->view = MatView_MPIBAIJ; 2102 mat->factor = matin->factor; 2103 mat->assembled = PETSC_TRUE; 2104 mat->insertmode = NOT_SET_VALUES; 2105 2106 a->m = mat->m = oldmat->m; 2107 a->n = mat->n = oldmat->n; 2108 a->M = mat->M = oldmat->M; 2109 a->N = mat->N = oldmat->N; 2110 2111 a->bs = oldmat->bs; 2112 a->bs2 = oldmat->bs2; 2113 a->mbs = oldmat->mbs; 2114 a->nbs = oldmat->nbs; 2115 a->Mbs = oldmat->Mbs; 2116 a->Nbs = oldmat->Nbs; 2117 2118 a->rstart = oldmat->rstart; 2119 a->rend = oldmat->rend; 2120 a->cstart = oldmat->cstart; 2121 a->cend = oldmat->cend; 2122 a->size = oldmat->size; 2123 a->rank = oldmat->rank; 2124 a->donotstash = oldmat->donotstash; 2125 a->roworiented = oldmat->roworiented; 2126 a->rowindices = 0; 2127 a->rowvalues = 0; 2128 a->getrowactive = PETSC_FALSE; 2129 a->barray = 0; 2130 a->rstart_bs = oldmat->rstart_bs; 2131 a->rend_bs = oldmat->rend_bs; 2132 a->cstart_bs = oldmat->cstart_bs; 2133 a->cend_bs = oldmat->cend_bs; 2134 2135 /* hash table stuff */ 2136 a->ht = 0; 2137 a->hd = 0; 2138 a->ht_size = 0; 2139 a->ht_flag = oldmat->ht_flag; 2140 a->ht_fact = oldmat->ht_fact; 2141 a->ht_total_ct = 0; 2142 a->ht_insert_ct = 0; 2143 2144 2145 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2146 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2147 a->cowners = a->rowners + a->size + 2; 2148 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2149 ierr = StashCreate_Private(matin->comm,1,oldmat->bs,&a->stash); CHKERRQ(ierr); 2150 ierr = StashCreate_Private(matin->comm,oldmat->bs,oldmat->bs,&a->bstash); CHKERRQ(ierr); 2151 if (oldmat->colmap) { 2152 #if defined (USE_CTABLE) 2153 ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr); 2154 #else 2155 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2156 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2157 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2158 #endif 2159 } else a->colmap = 0; 2160 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2161 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2162 PLogObjectMemory(mat,len*sizeof(int)); 2163 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2164 } else a->garray = 0; 2165 2166 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2167 PLogObjectParent(mat,a->lvec); 2168 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2169 2170 PLogObjectParent(mat,a->Mvctx); 2171 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2172 PLogObjectParent(mat,a->A); 2173 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2174 PLogObjectParent(mat,a->B); 2175 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2176 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2177 if (flg) { 2178 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2179 } 2180 *newmat = mat; 2181 PetscFunctionReturn(0); 2182 } 2183 2184 #include "sys.h" 2185 2186 #undef __FUNC__ 2187 #define __FUNC__ "MatLoad_MPIBAIJ" 2188 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2189 { 2190 Mat A; 2191 int i, nz, ierr, j,rstart, rend, fd; 2192 Scalar *vals,*buf; 2193 MPI_Comm comm = ((PetscObject)viewer)->comm; 2194 MPI_Status status; 2195 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2196 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2197 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2198 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2199 int dcount,kmax,k,nzcount,tmp; 2200 2201 PetscFunctionBegin; 2202 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2203 2204 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2205 if (!rank) { 2206 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2207 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2208 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2209 if (header[3] < 0) { 2210 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2211 } 2212 } 2213 2214 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2215 M = header[1]; N = header[2]; 2216 2217 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2218 2219 /* 2220 This code adds extra rows to make sure the number of rows is 2221 divisible by the blocksize 2222 */ 2223 Mbs = M/bs; 2224 extra_rows = bs - M + bs*(Mbs); 2225 if (extra_rows == bs) extra_rows = 0; 2226 else Mbs++; 2227 if (extra_rows &&!rank) { 2228 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2229 } 2230 2231 /* determine ownership of all rows */ 2232 mbs = Mbs/size + ((Mbs % size) > rank); 2233 m = mbs * bs; 2234 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2235 browners = rowners + size + 1; 2236 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2237 rowners[0] = 0; 2238 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2239 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2240 rstart = rowners[rank]; 2241 rend = rowners[rank+1]; 2242 2243 /* distribute row lengths to all processors */ 2244 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2245 if (!rank) { 2246 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2247 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2248 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2249 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2250 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2251 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2252 PetscFree(sndcounts); 2253 } else { 2254 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2255 } 2256 2257 if (!rank) { 2258 /* calculate the number of nonzeros on each processor */ 2259 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2260 PetscMemzero(procsnz,size*sizeof(int)); 2261 for ( i=0; i<size; i++ ) { 2262 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2263 procsnz[i] += rowlengths[j]; 2264 } 2265 } 2266 PetscFree(rowlengths); 2267 2268 /* determine max buffer needed and allocate it */ 2269 maxnz = 0; 2270 for ( i=0; i<size; i++ ) { 2271 maxnz = PetscMax(maxnz,procsnz[i]); 2272 } 2273 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2274 2275 /* read in my part of the matrix column indices */ 2276 nz = procsnz[0]; 2277 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2278 mycols = ibuf; 2279 if (size == 1) nz -= extra_rows; 2280 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2281 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2282 2283 /* read in every ones (except the last) and ship off */ 2284 for ( i=1; i<size-1; i++ ) { 2285 nz = procsnz[i]; 2286 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2287 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2288 } 2289 /* read in the stuff for the last proc */ 2290 if ( size != 1 ) { 2291 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2292 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2293 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2294 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2295 } 2296 PetscFree(cols); 2297 } else { 2298 /* determine buffer space needed for message */ 2299 nz = 0; 2300 for ( i=0; i<m; i++ ) { 2301 nz += locrowlens[i]; 2302 } 2303 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2304 mycols = ibuf; 2305 /* receive message of column indices*/ 2306 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2307 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2308 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2309 } 2310 2311 /* loop over local rows, determining number of off diagonal entries */ 2312 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2313 odlens = dlens + (rend-rstart); 2314 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2315 PetscMemzero(mask,3*Mbs*sizeof(int)); 2316 masked1 = mask + Mbs; 2317 masked2 = masked1 + Mbs; 2318 rowcount = 0; nzcount = 0; 2319 for ( i=0; i<mbs; i++ ) { 2320 dcount = 0; 2321 odcount = 0; 2322 for ( j=0; j<bs; j++ ) { 2323 kmax = locrowlens[rowcount]; 2324 for ( k=0; k<kmax; k++ ) { 2325 tmp = mycols[nzcount++]/bs; 2326 if (!mask[tmp]) { 2327 mask[tmp] = 1; 2328 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2329 else masked1[dcount++] = tmp; 2330 } 2331 } 2332 rowcount++; 2333 } 2334 2335 dlens[i] = dcount; 2336 odlens[i] = odcount; 2337 2338 /* zero out the mask elements we set */ 2339 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2340 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2341 } 2342 2343 /* create our matrix */ 2344 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2345 CHKERRQ(ierr); 2346 A = *newmat; 2347 MatSetOption(A,MAT_COLUMNS_SORTED); 2348 2349 if (!rank) { 2350 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2351 /* read in my part of the matrix numerical values */ 2352 nz = procsnz[0]; 2353 vals = buf; 2354 mycols = ibuf; 2355 if (size == 1) nz -= extra_rows; 2356 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2357 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2358 2359 /* insert into matrix */ 2360 jj = rstart*bs; 2361 for ( i=0; i<m; i++ ) { 2362 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2363 mycols += locrowlens[i]; 2364 vals += locrowlens[i]; 2365 jj++; 2366 } 2367 /* read in other processors (except the last one) and ship out */ 2368 for ( i=1; i<size-1; i++ ) { 2369 nz = procsnz[i]; 2370 vals = buf; 2371 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2372 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2373 } 2374 /* the last proc */ 2375 if ( size != 1 ){ 2376 nz = procsnz[i] - extra_rows; 2377 vals = buf; 2378 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2379 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2380 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2381 } 2382 PetscFree(procsnz); 2383 } else { 2384 /* receive numeric values */ 2385 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2386 2387 /* receive message of values*/ 2388 vals = buf; 2389 mycols = ibuf; 2390 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2391 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2392 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2393 2394 /* insert into matrix */ 2395 jj = rstart*bs; 2396 for ( i=0; i<m; i++ ) { 2397 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2398 mycols += locrowlens[i]; 2399 vals += locrowlens[i]; 2400 jj++; 2401 } 2402 } 2403 PetscFree(locrowlens); 2404 PetscFree(buf); 2405 PetscFree(ibuf); 2406 PetscFree(rowners); 2407 PetscFree(dlens); 2408 PetscFree(mask); 2409 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2410 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } 2413 2414 2415 2416 #undef __FUNC__ 2417 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2418 /*@ 2419 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2420 2421 Input Parameters: 2422 . mat - the matrix 2423 . fact - factor 2424 2425 Collective on Mat 2426 2427 Notes: 2428 This can also be set by the command line option: -mat_use_hash_table fact 2429 2430 .keywords: matrix, hashtable, factor, HT 2431 2432 .seealso: MatSetOption() 2433 @*/ 2434 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2435 { 2436 Mat_MPIBAIJ *baij; 2437 2438 PetscFunctionBegin; 2439 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2440 if (mat->type != MATMPIBAIJ) { 2441 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2442 } 2443 baij = (Mat_MPIBAIJ*) mat->data; 2444 baij->ht_fact = fact; 2445 PetscFunctionReturn(0); 2446 } 2447