1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.159 1999/03/11 23:21:46 balay Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 /* 20 Local utility routine that creates a mapping from the global column 21 number to the local number in the off-diagonal part of the local 22 storage of the matrix. This is done in a non scable way since the 23 length of colmap equals the global matrix length. 24 */ 25 #undef __FUNC__ 26 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 27 static int CreateColmap_MPIBAIJ_Private(Mat mat) 28 { 29 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 30 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 31 int nbs = B->nbs,i,bs=B->bs; 32 #if defined (USE_CTABLE) 33 int ierr; 34 #endif 35 36 PetscFunctionBegin; 37 #if defined (USE_CTABLE) 38 ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr); 39 for ( i=0; i<nbs; i++ ){ 40 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 41 } 42 #else 43 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 44 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 45 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 46 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 47 #endif 48 PetscFunctionReturn(0); 49 } 50 51 #define CHUNKSIZE 10 52 53 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 54 { \ 55 \ 56 brow = row/bs; \ 57 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 58 rmax = aimax[brow]; nrow = ailen[brow]; \ 59 bcol = col/bs; \ 60 ridx = row % bs; cidx = col % bs; \ 61 low = 0; high = nrow; \ 62 while (high-low > 3) { \ 63 t = (low+high)/2; \ 64 if (rp[t] > bcol) high = t; \ 65 else low = t; \ 66 } \ 67 for ( _i=low; _i<high; _i++ ) { \ 68 if (rp[_i] > bcol) break; \ 69 if (rp[_i] == bcol) { \ 70 bap = ap + bs2*_i + bs*cidx + ridx; \ 71 if (addv == ADD_VALUES) *bap += value; \ 72 else *bap = value; \ 73 goto a_noinsert; \ 74 } \ 75 } \ 76 if (a->nonew == 1) goto a_noinsert; \ 77 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 78 if (nrow >= rmax) { \ 79 /* there is no extra room in row, therefore enlarge */ \ 80 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 81 Scalar *new_a; \ 82 \ 83 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 84 \ 85 /* malloc new storage space */ \ 86 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 87 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 88 new_j = (int *) (new_a + bs2*new_nz); \ 89 new_i = new_j + new_nz; \ 90 \ 91 /* copy over old data into new slots */ \ 92 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 93 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 94 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 95 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 96 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 97 len*sizeof(int)); \ 98 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 99 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 100 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 101 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 102 /* free up old matrix storage */ \ 103 PetscFree(a->a); \ 104 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 105 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 106 a->singlemalloc = 1; \ 107 \ 108 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 109 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 110 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 111 a->maxnz += bs2*CHUNKSIZE; \ 112 a->reallocs++; \ 113 a->nz++; \ 114 } \ 115 N = nrow++ - 1; \ 116 /* shift up all the later entries in this row */ \ 117 for ( ii=N; ii>=_i; ii-- ) { \ 118 rp[ii+1] = rp[ii]; \ 119 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 120 } \ 121 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 122 rp[_i] = bcol; \ 123 ap[bs2*_i + bs*cidx + ridx] = value; \ 124 a_noinsert:; \ 125 ailen[brow] = nrow; \ 126 } 127 128 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 129 { \ 130 \ 131 brow = row/bs; \ 132 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 133 rmax = bimax[brow]; nrow = bilen[brow]; \ 134 bcol = col/bs; \ 135 ridx = row % bs; cidx = col % bs; \ 136 low = 0; high = nrow; \ 137 while (high-low > 3) { \ 138 t = (low+high)/2; \ 139 if (rp[t] > bcol) high = t; \ 140 else low = t; \ 141 } \ 142 for ( _i=low; _i<high; _i++ ) { \ 143 if (rp[_i] > bcol) break; \ 144 if (rp[_i] == bcol) { \ 145 bap = ap + bs2*_i + bs*cidx + ridx; \ 146 if (addv == ADD_VALUES) *bap += value; \ 147 else *bap = value; \ 148 goto b_noinsert; \ 149 } \ 150 } \ 151 if (b->nonew == 1) goto b_noinsert; \ 152 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 153 if (nrow >= rmax) { \ 154 /* there is no extra room in row, therefore enlarge */ \ 155 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 156 Scalar *new_a; \ 157 \ 158 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 159 \ 160 /* malloc new storage space */ \ 161 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 162 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 163 new_j = (int *) (new_a + bs2*new_nz); \ 164 new_i = new_j + new_nz; \ 165 \ 166 /* copy over old data into new slots */ \ 167 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 168 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 169 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 170 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 171 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 172 len*sizeof(int)); \ 173 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 174 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 175 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 176 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 177 /* free up old matrix storage */ \ 178 PetscFree(b->a); \ 179 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 180 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 181 b->singlemalloc = 1; \ 182 \ 183 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 184 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 185 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 186 b->maxnz += bs2*CHUNKSIZE; \ 187 b->reallocs++; \ 188 b->nz++; \ 189 } \ 190 N = nrow++ - 1; \ 191 /* shift up all the later entries in this row */ \ 192 for ( ii=N; ii>=_i; ii-- ) { \ 193 rp[ii+1] = rp[ii]; \ 194 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 195 } \ 196 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 197 rp[_i] = bcol; \ 198 ap[bs2*_i + bs*cidx + ridx] = value; \ 199 b_noinsert:; \ 200 bilen[brow] = nrow; \ 201 } 202 203 #undef __FUNC__ 204 #define __FUNC__ "MatSetValues_MPIBAIJ" 205 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 206 { 207 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 208 Scalar value; 209 int ierr,i,j,row,col; 210 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 211 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 212 int cend_orig=baij->cend_bs,bs=baij->bs; 213 214 /* Some Variables required in the macro */ 215 Mat A = baij->A; 216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 217 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 218 Scalar *aa=a->a; 219 220 Mat B = baij->B; 221 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 222 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 223 Scalar *ba=b->a; 224 225 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 226 int low,high,t,ridx,cidx,bs2=a->bs2; 227 Scalar *ap,*bap; 228 229 PetscFunctionBegin; 230 for ( i=0; i<m; i++ ) { 231 if (im[i] < 0) continue; 232 #if defined(USE_PETSC_BOPT_g) 233 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 234 #endif 235 if (im[i] >= rstart_orig && im[i] < rend_orig) { 236 row = im[i] - rstart_orig; 237 for ( j=0; j<n; j++ ) { 238 if (in[j] >= cstart_orig && in[j] < cend_orig){ 239 col = in[j] - cstart_orig; 240 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 241 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 242 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 243 } else if (in[j] < 0) continue; 244 #if defined(USE_PETSC_BOPT_g) 245 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 246 #endif 247 else { 248 if (mat->was_assembled) { 249 if (!baij->colmap) { 250 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 251 } 252 #if defined (USE_CTABLE) 253 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr); 254 col = col - 1 + in[j]%bs; 255 #else 256 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 257 #endif 258 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 259 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 260 col = in[j]; 261 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 262 B = baij->B; 263 b = (Mat_SeqBAIJ *) (B)->data; 264 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 265 ba=b->a; 266 } 267 } else col = in[j]; 268 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 269 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 270 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 271 } 272 } 273 } else { 274 if (roworiented && !baij->donotstash) { 275 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 276 } else { 277 if (!baij->donotstash) { 278 row = im[i]; 279 for ( j=0; j<n; j++ ) { 280 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m);CHKERRQ(ierr); 281 } 282 } 283 } 284 } 285 } 286 PetscFunctionReturn(0); 287 } 288 289 #undef __FUNC__ 290 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 291 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 292 { 293 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 294 Scalar *value,*barray=baij->barray; 295 int ierr,i,j,ii,jj,row,col; 296 int roworiented = baij->roworiented,rstart=baij->rstart ; 297 int rend=baij->rend,cstart=baij->cstart,stepval; 298 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 299 300 if(!barray) { 301 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 302 } 303 304 if (roworiented) { 305 stepval = (n-1)*bs; 306 } else { 307 stepval = (m-1)*bs; 308 } 309 for ( i=0; i<m; i++ ) { 310 if (im[i] < 0) continue; 311 #if defined(USE_PETSC_BOPT_g) 312 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 313 #endif 314 if (im[i] >= rstart && im[i] < rend) { 315 row = im[i] - rstart; 316 for ( j=0; j<n; j++ ) { 317 /* If NumCol = 1 then a copy is not required */ 318 if ((roworiented) && (n == 1)) { 319 barray = v + i*bs2; 320 } else if((!roworiented) && (m == 1)) { 321 barray = v + j*bs2; 322 } else { /* Here a copy is required */ 323 if (roworiented) { 324 value = v + i*(stepval+bs)*bs + j*bs; 325 } else { 326 value = v + j*(stepval+bs)*bs + i*bs; 327 } 328 for ( ii=0; ii<bs; ii++,value+=stepval ) { 329 for (jj=0; jj<bs; jj++ ) { 330 *barray++ = *value++; 331 } 332 } 333 barray -=bs2; 334 } 335 336 if (in[j] >= cstart && in[j] < cend){ 337 col = in[j] - cstart; 338 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 339 } 340 else if (in[j] < 0) continue; 341 #if defined(USE_PETSC_BOPT_g) 342 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 343 #endif 344 else { 345 if (mat->was_assembled) { 346 if (!baij->colmap) { 347 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 348 } 349 350 #if defined(USE_PETSC_BOPT_g) 351 #if defined (USE_CTABLE) 352 { int data; 353 ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr); 354 if((data - 1) % bs) 355 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 356 } 357 #else 358 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 359 #endif 360 #endif 361 #if defined (USE_CTABLE) 362 ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr); 363 col = (col - 1)/bs; 364 #else 365 col = (baij->colmap[in[j]] - 1)/bs; 366 #endif 367 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 368 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 369 col = in[j]; 370 } 371 } 372 else col = in[j]; 373 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 374 } 375 } 376 } else { 377 if (!baij->donotstash) { 378 /* ierr = StashValuesBlocked_Private(&baij->bstash,im[i],n,in,v+i*bs2*n, 379 roworiented,stepval,addv);CHKERRQ(ierr); */ 380 ierr = StashValuesBlocked_Private(&baij->bstash,im[i],n,in,v,m,n, 381 roworiented,i);CHKERRQ(ierr); 382 } 383 } 384 } 385 PetscFunctionReturn(0); 386 } 387 #define HASH_KEY 0.6180339887 388 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 389 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 390 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 391 #undef __FUNC__ 392 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 393 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 394 { 395 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 396 int ierr,i,j,row,col; 397 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 398 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 399 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 400 double tmp; 401 Scalar ** HD = baij->hd,value; 402 #if defined(USE_PETSC_BOPT_g) 403 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 404 #endif 405 406 PetscFunctionBegin; 407 408 for ( i=0; i<m; i++ ) { 409 #if defined(USE_PETSC_BOPT_g) 410 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 411 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 412 #endif 413 row = im[i]; 414 if (row >= rstart_orig && row < rend_orig) { 415 for ( j=0; j<n; j++ ) { 416 col = in[j]; 417 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 418 /* Look up into the Hash Table */ 419 key = (row/bs)*Nbs+(col/bs)+1; 420 h1 = HASH(size,key,tmp); 421 422 423 idx = h1; 424 #if defined(USE_PETSC_BOPT_g) 425 insert_ct++; 426 total_ct++; 427 if (HT[idx] != key) { 428 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 429 if (idx == size) { 430 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 431 if (idx == h1) { 432 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 433 } 434 } 435 } 436 #else 437 if (HT[idx] != key) { 438 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 439 if (idx == size) { 440 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 441 if (idx == h1) { 442 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 443 } 444 } 445 } 446 #endif 447 /* A HASH table entry is found, so insert the values at the correct address */ 448 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 449 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 450 } 451 } else { 452 if (roworiented && !baij->donotstash) { 453 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 454 } else { 455 if (!baij->donotstash) { 456 row = im[i]; 457 for ( j=0; j<n; j++ ) { 458 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m);CHKERRQ(ierr); 459 } 460 } 461 } 462 } 463 } 464 #if defined(USE_PETSC_BOPT_g) 465 baij->ht_total_ct = total_ct; 466 baij->ht_insert_ct = insert_ct; 467 #endif 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNC__ 472 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 473 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 474 { 475 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 476 int ierr,i,j,ii,jj,row,col,k,l; 477 int roworiented = baij->roworiented,rstart=baij->rstart ; 478 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 479 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 480 double tmp; 481 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 482 #if defined(USE_PETSC_BOPT_g) 483 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 484 #endif 485 486 PetscFunctionBegin; 487 488 if (roworiented) { 489 stepval = (n-1)*bs; 490 } else { 491 stepval = (m-1)*bs; 492 } 493 for ( i=0; i<m; i++ ) { 494 #if defined(USE_PETSC_BOPT_g) 495 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 496 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 497 #endif 498 row = im[i]; 499 v_t = v + i*bs2; 500 if (row >= rstart && row < rend) { 501 for ( j=0; j<n; j++ ) { 502 col = in[j]; 503 504 /* Look up into the Hash Table */ 505 key = row*Nbs+col+1; 506 h1 = HASH(size,key,tmp); 507 508 idx = h1; 509 #if defined(USE_PETSC_BOPT_g) 510 total_ct++; 511 insert_ct++; 512 if (HT[idx] != key) { 513 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 514 if (idx == size) { 515 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 516 if (idx == h1) { 517 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 518 } 519 } 520 } 521 #else 522 if (HT[idx] != key) { 523 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 524 if (idx == size) { 525 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 526 if (idx == h1) { 527 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 528 } 529 } 530 } 531 #endif 532 baij_a = HD[idx]; 533 if (roworiented) { 534 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 535 /* value = v + (i*(stepval+bs)+j)*bs; */ 536 value = v_t; 537 v_t += bs; 538 if (addv == ADD_VALUES) { 539 for ( ii=0; ii<bs; ii++,value+=stepval) { 540 for ( jj=ii; jj<bs2; jj+=bs ) { 541 baij_a[jj] += *value++; 542 } 543 } 544 } else { 545 for ( ii=0; ii<bs; ii++,value+=stepval) { 546 for ( jj=ii; jj<bs2; jj+=bs ) { 547 baij_a[jj] = *value++; 548 } 549 } 550 } 551 } else { 552 value = v + j*(stepval+bs)*bs + i*bs; 553 if (addv == ADD_VALUES) { 554 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 555 for ( jj=0; jj<bs; jj++ ) { 556 baij_a[jj] += *value++; 557 } 558 } 559 } else { 560 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 561 for ( jj=0; jj<bs; jj++ ) { 562 baij_a[jj] = *value++; 563 } 564 } 565 } 566 } 567 } 568 } else { 569 if (!baij->donotstash) { 570 if (roworiented ) { 571 row = im[i]*bs; 572 value = v + i*(stepval+bs)*bs; 573 for ( j=0; j<bs; j++,row++ ) { 574 for ( k=0; k<n; k++ ) { 575 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 576 ierr = StashValues_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr); 577 } 578 } 579 } 580 } else { 581 for ( j=0; j<n; j++ ) { 582 value = v + j*(stepval+bs)*bs + i*bs; 583 col = in[j]*bs; 584 for ( k=0; k<bs; k++,col++,value+=stepval) { 585 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 586 ierr = StashValues_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr); 587 } 588 } 589 } 590 } 591 } 592 } 593 } 594 #if defined(USE_PETSC_BOPT_g) 595 baij->ht_total_ct = total_ct; 596 baij->ht_insert_ct = insert_ct; 597 #endif 598 PetscFunctionReturn(0); 599 } 600 601 #undef __FUNC__ 602 #define __FUNC__ "MatGetValues_MPIBAIJ" 603 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 604 { 605 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 606 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 607 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 608 609 PetscFunctionBegin; 610 for ( i=0; i<m; i++ ) { 611 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 612 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 613 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 614 row = idxm[i] - bsrstart; 615 for ( j=0; j<n; j++ ) { 616 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 617 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 618 if (idxn[j] >= bscstart && idxn[j] < bscend){ 619 col = idxn[j] - bscstart; 620 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 621 } else { 622 if (!baij->colmap) { 623 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 624 } 625 #if defined (USE_CTABLE) 626 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr); 627 data --; 628 #else 629 data = baij->colmap[idxn[j]/bs]-1; 630 #endif 631 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 632 else { 633 col = data + idxn[j]%bs; 634 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 635 } 636 } 637 } 638 } else { 639 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 640 } 641 } 642 PetscFunctionReturn(0); 643 } 644 645 #undef __FUNC__ 646 #define __FUNC__ "MatNorm_MPIBAIJ" 647 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 648 { 649 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 650 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 651 int ierr, i,bs2=baij->bs2; 652 double sum = 0.0; 653 Scalar *v; 654 655 PetscFunctionBegin; 656 if (baij->size == 1) { 657 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 658 } else { 659 if (type == NORM_FROBENIUS) { 660 v = amat->a; 661 for (i=0; i<amat->nz*bs2; i++ ) { 662 #if defined(USE_PETSC_COMPLEX) 663 sum += PetscReal(PetscConj(*v)*(*v)); v++; 664 #else 665 sum += (*v)*(*v); v++; 666 #endif 667 } 668 v = bmat->a; 669 for (i=0; i<bmat->nz*bs2; i++ ) { 670 #if defined(USE_PETSC_COMPLEX) 671 sum += PetscReal(PetscConj(*v)*(*v)); v++; 672 #else 673 sum += (*v)*(*v); v++; 674 #endif 675 } 676 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 677 *norm = sqrt(*norm); 678 } else { 679 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 680 } 681 } 682 PetscFunctionReturn(0); 683 } 684 685 686 /* 687 Creates the hash table, and sets the table 688 This table is created only once. 689 If new entried need to be added to the matrix 690 then the hash table has to be destroyed and 691 recreated. 692 */ 693 #undef __FUNC__ 694 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 695 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 696 { 697 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 698 Mat A = baij->A, B=baij->B; 699 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 700 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 701 int size,bs2=baij->bs2,rstart=baij->rstart; 702 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 703 int *HT,key; 704 Scalar **HD; 705 double tmp; 706 #if defined(USE_PETSC_BOPT_g) 707 int ct=0,max=0; 708 #endif 709 710 PetscFunctionBegin; 711 baij->ht_size=(int)(factor*nz); 712 size = baij->ht_size; 713 714 if (baij->ht) { 715 PetscFunctionReturn(0); 716 } 717 718 /* Allocate Memory for Hash Table */ 719 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 720 baij->ht = (int*)(baij->hd + size); 721 HD = baij->hd; 722 HT = baij->ht; 723 724 725 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 726 727 728 /* Loop Over A */ 729 for ( i=0; i<a->mbs; i++ ) { 730 for ( j=ai[i]; j<ai[i+1]; j++ ) { 731 row = i+rstart; 732 col = aj[j]+cstart; 733 734 key = row*Nbs + col + 1; 735 h1 = HASH(size,key,tmp); 736 for ( k=0; k<size; k++ ){ 737 if (HT[(h1+k)%size] == 0.0) { 738 HT[(h1+k)%size] = key; 739 HD[(h1+k)%size] = a->a + j*bs2; 740 break; 741 #if defined(USE_PETSC_BOPT_g) 742 } else { 743 ct++; 744 #endif 745 } 746 } 747 #if defined(USE_PETSC_BOPT_g) 748 if (k> max) max = k; 749 #endif 750 } 751 } 752 /* Loop Over B */ 753 for ( i=0; i<b->mbs; i++ ) { 754 for ( j=bi[i]; j<bi[i+1]; j++ ) { 755 row = i+rstart; 756 col = garray[bj[j]]; 757 key = row*Nbs + col + 1; 758 h1 = HASH(size,key,tmp); 759 for ( k=0; k<size; k++ ){ 760 if (HT[(h1+k)%size] == 0.0) { 761 HT[(h1+k)%size] = key; 762 HD[(h1+k)%size] = b->a + j*bs2; 763 break; 764 #if defined(USE_PETSC_BOPT_g) 765 } else { 766 ct++; 767 #endif 768 } 769 } 770 #if defined(USE_PETSC_BOPT_g) 771 if (k> max) max = k; 772 #endif 773 } 774 } 775 776 /* Print Summary */ 777 #if defined(USE_PETSC_BOPT_g) 778 for ( i=0,j=0; i<size; i++) 779 if (HT[i]) {j++;} 780 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 781 (j== 0)? 0.0:((double)(ct+j))/j,max); 782 #endif 783 PetscFunctionReturn(0); 784 } 785 786 #undef __FUNC__ 787 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 788 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 789 { 790 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 791 int ierr; 792 InsertMode addv; 793 794 PetscFunctionBegin; 795 if (baij->donotstash) { 796 PetscFunctionReturn(0); 797 } 798 799 /* make sure all processors are either in INSERTMODE or ADDMODE */ 800 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 801 if (addv == (ADD_VALUES|INSERT_VALUES)) { 802 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 803 } 804 mat->insertmode = addv; /* in case this processor had no cache */ 805 806 ierr = StashScatterBegin_Private(&baij->stash,baij->rowners); CHKERRQ(ierr); 807 ierr = StashScatterBegin_Private(&baij->bstash,baij->rowners); CHKERRQ(ierr); 808 809 /* Free cache space */ 810 PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 811 PetscFunctionReturn(0); 812 } 813 814 #undef __FUNC__ 815 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 816 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 817 { 818 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 819 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 820 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 821 int *row,*col,other_disassembled,r1,r2,r3; 822 Scalar *val; 823 InsertMode addv = mat->insertmode; 824 825 PetscFunctionBegin; 826 if (!baij->donotstash) { 827 while (1) { 828 ierr = StashScatterGetMesg_Private(&baij->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 829 if (!flg) break; 830 831 for ( i=0; i<n; ) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 834 if (j < n) ncols = j-i; 835 else ncols = n-i; 836 /* Now assemble all these values with a single function call */ 837 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 838 i = j; 839 } 840 } 841 ierr = StashScatterEnd_Private(&baij->stash); CHKERRQ(ierr); 842 /* Now process the block-stash. Since the values are stashed column-oriented, 843 set the roworiented flag to column oriented, and after MatSetValues() 844 restore the original flags */ 845 r1 = baij->roworiented; 846 r2 = a->roworiented; 847 r3 = b->roworiented; 848 baij->roworiented = 0; 849 a->roworiented = 0; 850 b->roworiented = 0; 851 while (1) { 852 ierr = StashScatterGetMesg_Private(&baij->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 853 if (!flg) break; 854 855 for ( i=0; i<n; ) { 856 /* Now identify the consecutive vals belonging to the same row */ 857 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 858 if (j < n) ncols = j-i; 859 else ncols = n-i; 860 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr); 861 i = j; 862 } 863 } 864 ierr = StashScatterEnd_Private(&baij->bstash); CHKERRQ(ierr); 865 baij->roworiented = r1; 866 a->roworiented = r2; 867 b->roworiented = r3; 868 } 869 870 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 871 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 872 873 /* determine if any processor has disassembled, if so we must 874 also disassemble ourselfs, in order that we may reassemble. */ 875 /* 876 if nonzero structure of submatrix B cannot change then we know that 877 no processor disassembled thus we can skip this stuff 878 */ 879 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 880 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 881 if (mat->was_assembled && !other_disassembled) { 882 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 883 } 884 } 885 886 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 887 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 888 } 889 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 890 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 891 892 #if defined(USE_PETSC_BOPT_g) 893 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 894 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 895 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 896 baij->ht_total_ct = 0; 897 baij->ht_insert_ct = 0; 898 } 899 #endif 900 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 901 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 902 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 903 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 904 } 905 906 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 907 PetscFunctionReturn(0); 908 } 909 910 #undef __FUNC__ 911 #define __FUNC__ "MatView_MPIBAIJ_Binary" 912 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 913 { 914 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 915 int ierr; 916 917 PetscFunctionBegin; 918 if (baij->size == 1) { 919 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 920 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNC__ 925 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 926 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 927 { 928 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 929 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 930 FILE *fd; 931 ViewerType vtype; 932 933 PetscFunctionBegin; 934 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 935 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 936 ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr); 937 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 938 MatInfo info; 939 MPI_Comm_rank(mat->comm,&rank); 940 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 941 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 942 PetscSequentialPhaseBegin(mat->comm,1); 943 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 944 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 945 baij->bs,(int)info.memory); 946 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 947 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 948 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 949 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 950 fflush(fd); 951 PetscSequentialPhaseEnd(mat->comm,1); 952 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 955 PetscPrintf(mat->comm," block size is %d\n",bs); 956 PetscFunctionReturn(0); 957 } 958 } 959 960 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 961 Draw draw; 962 PetscTruth isnull; 963 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 964 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 965 } 966 967 if (size == 1) { 968 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 969 } else { 970 /* assemble the entire matrix onto first processor. */ 971 Mat A; 972 Mat_SeqBAIJ *Aloc; 973 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 974 int mbs = baij->mbs; 975 Scalar *a; 976 977 if (!rank) { 978 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 979 } else { 980 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 981 } 982 PLogObjectParent(mat,A); 983 984 /* copy over the A part */ 985 Aloc = (Mat_SeqBAIJ*) baij->A->data; 986 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 987 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 988 989 for ( i=0; i<mbs; i++ ) { 990 rvals[0] = bs*(baij->rstart + i); 991 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 992 for ( j=ai[i]; j<ai[i+1]; j++ ) { 993 col = (baij->cstart+aj[j])*bs; 994 for (k=0; k<bs; k++ ) { 995 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 996 col++; a += bs; 997 } 998 } 999 } 1000 /* copy over the B part */ 1001 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1002 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1003 for ( i=0; i<mbs; i++ ) { 1004 rvals[0] = bs*(baij->rstart + i); 1005 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1006 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1007 col = baij->garray[aj[j]]*bs; 1008 for (k=0; k<bs; k++ ) { 1009 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1010 col++; a += bs; 1011 } 1012 } 1013 } 1014 PetscFree(rvals); 1015 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1016 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1017 /* 1018 Everyone has to call to draw the matrix since the graphics waits are 1019 synchronized across all processors that share the Draw object 1020 */ 1021 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1022 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1023 } 1024 ierr = MatDestroy(A); CHKERRQ(ierr); 1025 } 1026 PetscFunctionReturn(0); 1027 } 1028 1029 1030 1031 #undef __FUNC__ 1032 #define __FUNC__ "MatView_MPIBAIJ" 1033 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1034 { 1035 int ierr; 1036 ViewerType vtype; 1037 1038 PetscFunctionBegin; 1039 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1040 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1041 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 1042 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 1043 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1044 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1045 } else { 1046 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNC__ 1052 #define __FUNC__ "MatDestroy_MPIBAIJ" 1053 int MatDestroy_MPIBAIJ(Mat mat) 1054 { 1055 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1056 int ierr; 1057 1058 PetscFunctionBegin; 1059 if (--mat->refct > 0) PetscFunctionReturn(0); 1060 1061 if (mat->mapping) { 1062 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 1063 } 1064 if (mat->bmapping) { 1065 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 1066 } 1067 if (mat->rmap) { 1068 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1069 } 1070 if (mat->cmap) { 1071 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1072 } 1073 #if defined(USE_PETSC_LOG) 1074 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1075 #endif 1076 1077 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1078 ierr = StashDestroy_Private(&baij->bstash); CHKERRQ(ierr); 1079 PetscFree(baij->rowners); 1080 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1081 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1082 #if defined (USE_CTABLE) 1083 if (baij->colmap) TableDelete(baij->colmap); 1084 #else 1085 if (baij->colmap) PetscFree(baij->colmap); 1086 #endif 1087 if (baij->garray) PetscFree(baij->garray); 1088 if (baij->lvec) VecDestroy(baij->lvec); 1089 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1090 if (baij->rowvalues) PetscFree(baij->rowvalues); 1091 if (baij->barray) PetscFree(baij->barray); 1092 if (baij->hd) PetscFree(baij->hd); 1093 PetscFree(baij); 1094 PLogObjectDestroy(mat); 1095 PetscHeaderDestroy(mat); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNC__ 1100 #define __FUNC__ "MatMult_MPIBAIJ" 1101 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1102 { 1103 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1104 int ierr, nt; 1105 1106 PetscFunctionBegin; 1107 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1108 if (nt != a->n) { 1109 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1110 } 1111 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1112 if (nt != a->m) { 1113 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1114 } 1115 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1116 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1117 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1118 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1119 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNC__ 1124 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1125 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1126 { 1127 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1128 int ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1132 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1133 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1134 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNC__ 1139 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1140 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1141 { 1142 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1143 int ierr; 1144 1145 PetscFunctionBegin; 1146 /* do nondiagonal part */ 1147 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1148 /* send it on its way */ 1149 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1150 /* do local part */ 1151 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1152 /* receive remote parts: note this assumes the values are not actually */ 1153 /* inserted in yy until the next line, which is true for my implementation*/ 1154 /* but is not perhaps always true. */ 1155 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNC__ 1160 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1161 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1162 { 1163 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1164 int ierr; 1165 1166 PetscFunctionBegin; 1167 /* do nondiagonal part */ 1168 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1169 /* send it on its way */ 1170 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1171 /* do local part */ 1172 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1173 /* receive remote parts: note this assumes the values are not actually */ 1174 /* inserted in yy until the next line, which is true for my implementation*/ 1175 /* but is not perhaps always true. */ 1176 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 /* 1181 This only works correctly for square matrices where the subblock A->A is the 1182 diagonal block 1183 */ 1184 #undef __FUNC__ 1185 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1186 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1187 { 1188 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1189 int ierr; 1190 1191 PetscFunctionBegin; 1192 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1193 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNC__ 1198 #define __FUNC__ "MatScale_MPIBAIJ" 1199 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1200 { 1201 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1202 int ierr; 1203 1204 PetscFunctionBegin; 1205 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1206 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNC__ 1211 #define __FUNC__ "MatGetSize_MPIBAIJ" 1212 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1213 { 1214 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1215 1216 PetscFunctionBegin; 1217 if (m) *m = mat->M; 1218 if (n) *n = mat->N; 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNC__ 1223 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1224 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1225 { 1226 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1227 1228 PetscFunctionBegin; 1229 *m = mat->m; *n = mat->n; 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNC__ 1234 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1235 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1236 { 1237 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1238 1239 PetscFunctionBegin; 1240 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNC__ 1245 #define __FUNC__ "MatGetRow_MPIBAIJ" 1246 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1247 { 1248 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1249 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1250 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1251 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1252 int *cmap, *idx_p,cstart = mat->cstart; 1253 1254 PetscFunctionBegin; 1255 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1256 mat->getrowactive = PETSC_TRUE; 1257 1258 if (!mat->rowvalues && (idx || v)) { 1259 /* 1260 allocate enough space to hold information from the longest row. 1261 */ 1262 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1263 int max = 1,mbs = mat->mbs,tmp; 1264 for ( i=0; i<mbs; i++ ) { 1265 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1266 if (max < tmp) { max = tmp; } 1267 } 1268 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1269 CHKPTRQ(mat->rowvalues); 1270 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1271 } 1272 1273 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1274 lrow = row - brstart; 1275 1276 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1277 if (!v) {pvA = 0; pvB = 0;} 1278 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1279 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1280 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1281 nztot = nzA + nzB; 1282 1283 cmap = mat->garray; 1284 if (v || idx) { 1285 if (nztot) { 1286 /* Sort by increasing column numbers, assuming A and B already sorted */ 1287 int imark = -1; 1288 if (v) { 1289 *v = v_p = mat->rowvalues; 1290 for ( i=0; i<nzB; i++ ) { 1291 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1292 else break; 1293 } 1294 imark = i; 1295 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1296 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1297 } 1298 if (idx) { 1299 *idx = idx_p = mat->rowindices; 1300 if (imark > -1) { 1301 for ( i=0; i<imark; i++ ) { 1302 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1303 } 1304 } else { 1305 for ( i=0; i<nzB; i++ ) { 1306 if (cmap[cworkB[i]/bs] < cstart) 1307 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1308 else break; 1309 } 1310 imark = i; 1311 } 1312 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1313 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1314 } 1315 } else { 1316 if (idx) *idx = 0; 1317 if (v) *v = 0; 1318 } 1319 } 1320 *nz = nztot; 1321 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1322 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNC__ 1327 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1328 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1329 { 1330 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1331 1332 PetscFunctionBegin; 1333 if (baij->getrowactive == PETSC_FALSE) { 1334 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1335 } 1336 baij->getrowactive = PETSC_FALSE; 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNC__ 1341 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1342 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1343 { 1344 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1345 1346 PetscFunctionBegin; 1347 *bs = baij->bs; 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNC__ 1352 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1353 int MatZeroEntries_MPIBAIJ(Mat A) 1354 { 1355 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1356 int ierr; 1357 1358 PetscFunctionBegin; 1359 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1360 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNC__ 1365 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1366 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1367 { 1368 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1369 Mat A = a->A, B = a->B; 1370 int ierr; 1371 double isend[5], irecv[5]; 1372 1373 PetscFunctionBegin; 1374 info->block_size = (double)a->bs; 1375 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1376 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1377 isend[3] = info->memory; isend[4] = info->mallocs; 1378 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1379 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1380 isend[3] += info->memory; isend[4] += info->mallocs; 1381 if (flag == MAT_LOCAL) { 1382 info->nz_used = isend[0]; 1383 info->nz_allocated = isend[1]; 1384 info->nz_unneeded = isend[2]; 1385 info->memory = isend[3]; 1386 info->mallocs = isend[4]; 1387 } else if (flag == MAT_GLOBAL_MAX) { 1388 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1389 info->nz_used = irecv[0]; 1390 info->nz_allocated = irecv[1]; 1391 info->nz_unneeded = irecv[2]; 1392 info->memory = irecv[3]; 1393 info->mallocs = irecv[4]; 1394 } else if (flag == MAT_GLOBAL_SUM) { 1395 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1396 info->nz_used = irecv[0]; 1397 info->nz_allocated = irecv[1]; 1398 info->nz_unneeded = irecv[2]; 1399 info->memory = irecv[3]; 1400 info->mallocs = irecv[4]; 1401 } else { 1402 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1403 } 1404 info->rows_global = (double)a->M; 1405 info->columns_global = (double)a->N; 1406 info->rows_local = (double)a->m; 1407 info->columns_local = (double)a->N; 1408 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1409 info->fill_ratio_needed = 0; 1410 info->factor_mallocs = 0; 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNC__ 1415 #define __FUNC__ "MatSetOption_MPIBAIJ" 1416 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1417 { 1418 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1419 int ierr; 1420 1421 PetscFunctionBegin; 1422 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1423 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1424 op == MAT_COLUMNS_UNSORTED || 1425 op == MAT_COLUMNS_SORTED || 1426 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1427 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1428 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1429 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1430 } else if (op == MAT_ROW_ORIENTED) { 1431 a->roworiented = 1; 1432 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1434 } else if (op == MAT_ROWS_SORTED || 1435 op == MAT_ROWS_UNSORTED || 1436 op == MAT_SYMMETRIC || 1437 op == MAT_STRUCTURALLY_SYMMETRIC || 1438 op == MAT_YES_NEW_DIAGONALS || 1439 op == MAT_USE_HASH_TABLE) { 1440 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1441 } else if (op == MAT_COLUMN_ORIENTED) { 1442 a->roworiented = 0; 1443 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1444 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1445 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1446 a->donotstash = 1; 1447 } else if (op == MAT_NO_NEW_DIAGONALS) { 1448 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1449 } else if (op == MAT_USE_HASH_TABLE) { 1450 a->ht_flag = 1; 1451 } else { 1452 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1453 } 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNC__ 1458 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1459 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1460 { 1461 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1462 Mat_SeqBAIJ *Aloc; 1463 Mat B; 1464 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1465 int bs=baij->bs,mbs=baij->mbs; 1466 Scalar *a; 1467 1468 PetscFunctionBegin; 1469 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1470 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1471 CHKERRQ(ierr); 1472 1473 /* copy over the A part */ 1474 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1475 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1476 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1477 1478 for ( i=0; i<mbs; i++ ) { 1479 rvals[0] = bs*(baij->rstart + i); 1480 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1481 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1482 col = (baij->cstart+aj[j])*bs; 1483 for (k=0; k<bs; k++ ) { 1484 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1485 col++; a += bs; 1486 } 1487 } 1488 } 1489 /* copy over the B part */ 1490 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1491 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1492 for ( i=0; i<mbs; i++ ) { 1493 rvals[0] = bs*(baij->rstart + i); 1494 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1495 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1496 col = baij->garray[aj[j]]*bs; 1497 for (k=0; k<bs; k++ ) { 1498 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1499 col++; a += bs; 1500 } 1501 } 1502 } 1503 PetscFree(rvals); 1504 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1505 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1506 1507 if (matout != PETSC_NULL) { 1508 *matout = B; 1509 } else { 1510 PetscOps *Abops; 1511 MatOps Aops; 1512 1513 /* This isn't really an in-place transpose .... but free data structures from baij */ 1514 PetscFree(baij->rowners); 1515 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1516 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1517 #if defined (USE_CTABLE) 1518 if (baij->colmap) TableDelete(baij->colmap); 1519 #else 1520 if (baij->colmap) PetscFree(baij->colmap); 1521 #endif 1522 if (baij->garray) PetscFree(baij->garray); 1523 if (baij->lvec) VecDestroy(baij->lvec); 1524 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1525 PetscFree(baij); 1526 1527 /* 1528 This is horrible, horrible code. We need to keep the 1529 A pointers for the bops and ops but copy everything 1530 else from C. 1531 */ 1532 Abops = A->bops; 1533 Aops = A->ops; 1534 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1535 A->bops = Abops; 1536 A->ops = Aops; 1537 1538 PetscHeaderDestroy(B); 1539 } 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNC__ 1544 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1545 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1546 { 1547 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1548 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1549 int ierr,s1,s2,s3; 1550 1551 PetscFunctionBegin; 1552 if (ll) { 1553 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1554 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1555 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1556 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1557 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1558 } 1559 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1560 PetscFunctionReturn(0); 1561 } 1562 1563 #undef __FUNC__ 1564 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1565 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1566 { 1567 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1568 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1569 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1570 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1571 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1572 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1573 MPI_Comm comm = A->comm; 1574 MPI_Request *send_waits,*recv_waits; 1575 MPI_Status recv_status,*send_status; 1576 IS istmp; 1577 1578 PetscFunctionBegin; 1579 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1580 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1581 1582 /* first count number of contributors to each processor */ 1583 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1584 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1585 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1586 for ( i=0; i<N; i++ ) { 1587 idx = rows[i]; 1588 found = 0; 1589 for ( j=0; j<size; j++ ) { 1590 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1591 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1592 } 1593 } 1594 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1595 } 1596 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1597 1598 /* inform other processors of number of messages and max length*/ 1599 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1600 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1601 nrecvs = work[rank]; 1602 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1603 nmax = work[rank]; 1604 PetscFree(work); 1605 1606 /* post receives: */ 1607 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1608 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1609 for ( i=0; i<nrecvs; i++ ) { 1610 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1611 } 1612 1613 /* do sends: 1614 1) starts[i] gives the starting index in svalues for stuff going to 1615 the ith processor 1616 */ 1617 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1618 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1619 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1620 starts[0] = 0; 1621 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1622 for ( i=0; i<N; i++ ) { 1623 svalues[starts[owner[i]]++] = rows[i]; 1624 } 1625 ISRestoreIndices(is,&rows); 1626 1627 starts[0] = 0; 1628 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1629 count = 0; 1630 for ( i=0; i<size; i++ ) { 1631 if (procs[i]) { 1632 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1633 } 1634 } 1635 PetscFree(starts); 1636 1637 base = owners[rank]*bs; 1638 1639 /* wait on receives */ 1640 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1641 source = lens + nrecvs; 1642 count = nrecvs; slen = 0; 1643 while (count) { 1644 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1645 /* unpack receives into our local space */ 1646 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1647 source[imdex] = recv_status.MPI_SOURCE; 1648 lens[imdex] = n; 1649 slen += n; 1650 count--; 1651 } 1652 PetscFree(recv_waits); 1653 1654 /* move the data into the send scatter */ 1655 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1656 count = 0; 1657 for ( i=0; i<nrecvs; i++ ) { 1658 values = rvalues + i*nmax; 1659 for ( j=0; j<lens[i]; j++ ) { 1660 lrows[count++] = values[j] - base; 1661 } 1662 } 1663 PetscFree(rvalues); PetscFree(lens); 1664 PetscFree(owner); PetscFree(nprocs); 1665 1666 /* actually zap the local rows */ 1667 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1668 PLogObjectParent(A,istmp); 1669 1670 /* 1671 Zero the required rows. If the "diagonal block" of the matrix 1672 is square and the user wishes to set the diagonal we use seperate 1673 code so that MatSetValues() is not called for each diagonal allocating 1674 new memory, thus calling lots of mallocs and slowing things down. 1675 1676 Contributed by: Mathew Knepley 1677 */ 1678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1679 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1680 if (diag && (l->A->M == l->A->N)) { 1681 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1682 } else if (diag) { 1683 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1684 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1685 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1686 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1687 } 1688 for ( i = 0; i < slen; i++ ) { 1689 row = lrows[i] + rstart_bs; 1690 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1691 } 1692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1694 } else { 1695 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1696 } 1697 1698 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1699 PetscFree(lrows); 1700 1701 /* wait on sends */ 1702 if (nsends) { 1703 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1704 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1705 PetscFree(send_status); 1706 } 1707 PetscFree(send_waits); PetscFree(svalues); 1708 1709 PetscFunctionReturn(0); 1710 } 1711 1712 #undef __FUNC__ 1713 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1714 int MatPrintHelp_MPIBAIJ(Mat A) 1715 { 1716 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1717 MPI_Comm comm = A->comm; 1718 static int called = 0; 1719 int ierr; 1720 1721 PetscFunctionBegin; 1722 if (!a->rank) { 1723 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1724 } 1725 if (called) {PetscFunctionReturn(0);} else called = 1; 1726 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1727 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNC__ 1732 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1733 int MatSetUnfactored_MPIBAIJ(Mat A) 1734 { 1735 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1736 int ierr; 1737 1738 PetscFunctionBegin; 1739 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1740 PetscFunctionReturn(0); 1741 } 1742 1743 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1744 1745 /* -------------------------------------------------------------------*/ 1746 static struct _MatOps MatOps_Values = { 1747 MatSetValues_MPIBAIJ, 1748 MatGetRow_MPIBAIJ, 1749 MatRestoreRow_MPIBAIJ, 1750 MatMult_MPIBAIJ, 1751 MatMultAdd_MPIBAIJ, 1752 MatMultTrans_MPIBAIJ, 1753 MatMultTransAdd_MPIBAIJ, 1754 0, 1755 0, 1756 0, 1757 0, 1758 0, 1759 0, 1760 0, 1761 MatTranspose_MPIBAIJ, 1762 MatGetInfo_MPIBAIJ, 1763 0, 1764 MatGetDiagonal_MPIBAIJ, 1765 MatDiagonalScale_MPIBAIJ, 1766 MatNorm_MPIBAIJ, 1767 MatAssemblyBegin_MPIBAIJ, 1768 MatAssemblyEnd_MPIBAIJ, 1769 0, 1770 MatSetOption_MPIBAIJ, 1771 MatZeroEntries_MPIBAIJ, 1772 MatZeroRows_MPIBAIJ, 1773 0, 1774 0, 1775 0, 1776 0, 1777 MatGetSize_MPIBAIJ, 1778 MatGetLocalSize_MPIBAIJ, 1779 MatGetOwnershipRange_MPIBAIJ, 1780 0, 1781 0, 1782 0, 1783 0, 1784 MatDuplicate_MPIBAIJ, 1785 0, 1786 0, 1787 0, 1788 0, 1789 0, 1790 MatGetSubMatrices_MPIBAIJ, 1791 MatIncreaseOverlap_MPIBAIJ, 1792 MatGetValues_MPIBAIJ, 1793 0, 1794 MatPrintHelp_MPIBAIJ, 1795 MatScale_MPIBAIJ, 1796 0, 1797 0, 1798 0, 1799 MatGetBlockSize_MPIBAIJ, 1800 0, 1801 0, 1802 0, 1803 0, 1804 0, 1805 0, 1806 MatSetUnfactored_MPIBAIJ, 1807 0, 1808 MatSetValuesBlocked_MPIBAIJ, 1809 0, 1810 0, 1811 0, 1812 MatGetMaps_Petsc}; 1813 1814 1815 EXTERN_C_BEGIN 1816 #undef __FUNC__ 1817 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1818 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1819 { 1820 PetscFunctionBegin; 1821 *a = ((Mat_MPIBAIJ *)A->data)->A; 1822 *iscopy = PETSC_FALSE; 1823 PetscFunctionReturn(0); 1824 } 1825 EXTERN_C_END 1826 1827 #undef __FUNC__ 1828 #define __FUNC__ "MatCreateMPIBAIJ" 1829 /*@C 1830 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1831 (block compressed row). For good matrix assembly performance 1832 the user should preallocate the matrix storage by setting the parameters 1833 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1834 performance can be increased by more than a factor of 50. 1835 1836 Collective on MPI_Comm 1837 1838 Input Parameters: 1839 + comm - MPI communicator 1840 . bs - size of blockk 1841 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1842 This value should be the same as the local size used in creating the 1843 y vector for the matrix-vector product y = Ax. 1844 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1845 This value should be the same as the local size used in creating the 1846 x vector for the matrix-vector product y = Ax. 1847 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1848 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1849 . d_nz - number of block nonzeros per block row in diagonal portion of local 1850 submatrix (same for all local rows) 1851 . d_nzz - array containing the number of block nonzeros in the various block rows 1852 of the in diagonal portion of the local (possibly different for each block 1853 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1854 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1855 submatrix (same for all local rows). 1856 - o_nzz - array containing the number of nonzeros in the various block rows of the 1857 off-diagonal portion of the local submatrix (possibly different for 1858 each block row) or PETSC_NULL. 1859 1860 Output Parameter: 1861 . A - the matrix 1862 1863 Options Database Keys: 1864 . -mat_no_unroll - uses code that does not unroll the loops in the 1865 block calculations (much slower) 1866 . -mat_block_size - size of the blocks to use 1867 . -mat_mpi - use the parallel matrix data structures even on one processor 1868 (defaults to using SeqBAIJ format on one processor) 1869 1870 Notes: 1871 The user MUST specify either the local or global matrix dimensions 1872 (possibly both). 1873 1874 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1875 than it must be used on all processors that share the object for that argument. 1876 1877 Storage Information: 1878 For a square global matrix we define each processor's diagonal portion 1879 to be its local rows and the corresponding columns (a square submatrix); 1880 each processor's off-diagonal portion encompasses the remainder of the 1881 local matrix (a rectangular submatrix). 1882 1883 The user can specify preallocated storage for the diagonal part of 1884 the local submatrix with either d_nz or d_nnz (not both). Set 1885 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1886 memory allocation. Likewise, specify preallocated storage for the 1887 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1888 1889 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1890 the figure below we depict these three local rows and all columns (0-11). 1891 1892 .vb 1893 0 1 2 3 4 5 6 7 8 9 10 11 1894 ------------------- 1895 row 3 | o o o d d d o o o o o o 1896 row 4 | o o o d d d o o o o o o 1897 row 5 | o o o d d d o o o o o o 1898 ------------------- 1899 .ve 1900 1901 Thus, any entries in the d locations are stored in the d (diagonal) 1902 submatrix, and any entries in the o locations are stored in the 1903 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1904 stored simply in the MATSEQBAIJ format for compressed row storage. 1905 1906 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1907 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1908 In general, for PDE problems in which most nonzeros are near the diagonal, 1909 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1910 or you will get TERRIBLE performance; see the users' manual chapter on 1911 matrices. 1912 1913 Level: intermediate 1914 1915 .keywords: matrix, block, aij, compressed row, sparse, parallel 1916 1917 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1918 @*/ 1919 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1920 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1921 { 1922 Mat B; 1923 Mat_MPIBAIJ *b; 1924 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1925 int flag1 = 0,flag2 = 0; 1926 1927 PetscFunctionBegin; 1928 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1929 1930 MPI_Comm_size(comm,&size); 1931 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 1932 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1933 if (!flag1 && !flag2 && size == 1) { 1934 if (M == PETSC_DECIDE) M = m; 1935 if (N == PETSC_DECIDE) N = n; 1936 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1937 PetscFunctionReturn(0); 1938 } 1939 1940 *A = 0; 1941 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 1942 PLogObjectCreate(B); 1943 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1944 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1945 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1946 1947 B->ops->destroy = MatDestroy_MPIBAIJ; 1948 B->ops->view = MatView_MPIBAIJ; 1949 B->mapping = 0; 1950 B->factor = 0; 1951 B->assembled = PETSC_FALSE; 1952 1953 B->insertmode = NOT_SET_VALUES; 1954 MPI_Comm_rank(comm,&b->rank); 1955 MPI_Comm_size(comm,&b->size); 1956 1957 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1958 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1959 } 1960 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1961 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1962 } 1963 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1964 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1965 } 1966 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1967 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1968 1969 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1970 work[0] = m; work[1] = n; 1971 mbs = m/bs; nbs = n/bs; 1972 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1973 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1974 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1975 } 1976 if (m == PETSC_DECIDE) { 1977 Mbs = M/bs; 1978 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1979 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1980 m = mbs*bs; 1981 } 1982 if (n == PETSC_DECIDE) { 1983 Nbs = N/bs; 1984 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1985 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1986 n = nbs*bs; 1987 } 1988 if (mbs*bs != m || nbs*bs != n) { 1989 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1990 } 1991 1992 b->m = m; B->m = m; 1993 b->n = n; B->n = n; 1994 b->N = N; B->N = N; 1995 b->M = M; B->M = M; 1996 b->bs = bs; 1997 b->bs2 = bs*bs; 1998 b->mbs = mbs; 1999 b->nbs = nbs; 2000 b->Mbs = Mbs; 2001 b->Nbs = Nbs; 2002 2003 /* the information in the maps duplicates the information computed below, eventually 2004 we should remove the duplicate information that is not contained in the maps */ 2005 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2006 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2007 2008 /* build local table of row and column ownerships */ 2009 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 2010 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2011 b->cowners = b->rowners + b->size + 2; 2012 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2013 b->rowners[0] = 0; 2014 for ( i=2; i<=b->size; i++ ) { 2015 b->rowners[i] += b->rowners[i-1]; 2016 } 2017 b->rstart = b->rowners[b->rank]; 2018 b->rend = b->rowners[b->rank+1]; 2019 b->rstart_bs = b->rstart * bs; 2020 b->rend_bs = b->rend * bs; 2021 2022 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2023 b->cowners[0] = 0; 2024 for ( i=2; i<=b->size; i++ ) { 2025 b->cowners[i] += b->cowners[i-1]; 2026 } 2027 b->cstart = b->cowners[b->rank]; 2028 b->cend = b->cowners[b->rank+1]; 2029 b->cstart_bs = b->cstart * bs; 2030 b->cend_bs = b->cend * bs; 2031 2032 2033 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2034 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 2035 PLogObjectParent(B,b->A); 2036 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2037 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 2038 PLogObjectParent(B,b->B); 2039 2040 /* build cache for off array entries formed */ 2041 ierr = StashCreate_Private(comm,1,bs,&b->stash); CHKERRQ(ierr); 2042 ierr = StashCreate_Private(comm,bs,bs,&b->bstash); CHKERRQ(ierr); 2043 b->donotstash = 0; 2044 b->colmap = 0; 2045 b->garray = 0; 2046 b->roworiented = 1; 2047 2048 /* stuff used in block assembly */ 2049 b->barray = 0; 2050 2051 /* stuff used for matrix vector multiply */ 2052 b->lvec = 0; 2053 b->Mvctx = 0; 2054 2055 /* stuff for MatGetRow() */ 2056 b->rowindices = 0; 2057 b->rowvalues = 0; 2058 b->getrowactive = PETSC_FALSE; 2059 2060 /* hash table stuff */ 2061 b->ht = 0; 2062 b->hd = 0; 2063 b->ht_size = 0; 2064 b->ht_flag = 0; 2065 b->ht_fact = 0; 2066 b->ht_total_ct = 0; 2067 b->ht_insert_ct = 0; 2068 2069 *A = B; 2070 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2071 if (flg) { 2072 double fact = 1.39; 2073 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2074 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2075 if (fact <= 1.0) fact = 1.39; 2076 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2077 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2078 } 2079 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2080 "MatGetDiagonalBlock_MPIBAIJ", 2081 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2082 PetscFunctionReturn(0); 2083 } 2084 2085 #undef __FUNC__ 2086 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2087 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2088 { 2089 Mat mat; 2090 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2091 int ierr, len=0, flg; 2092 2093 PetscFunctionBegin; 2094 *newmat = 0; 2095 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2096 PLogObjectCreate(mat); 2097 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2098 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2099 mat->ops->destroy = MatDestroy_MPIBAIJ; 2100 mat->ops->view = MatView_MPIBAIJ; 2101 mat->factor = matin->factor; 2102 mat->assembled = PETSC_TRUE; 2103 mat->insertmode = NOT_SET_VALUES; 2104 2105 a->m = mat->m = oldmat->m; 2106 a->n = mat->n = oldmat->n; 2107 a->M = mat->M = oldmat->M; 2108 a->N = mat->N = oldmat->N; 2109 2110 a->bs = oldmat->bs; 2111 a->bs2 = oldmat->bs2; 2112 a->mbs = oldmat->mbs; 2113 a->nbs = oldmat->nbs; 2114 a->Mbs = oldmat->Mbs; 2115 a->Nbs = oldmat->Nbs; 2116 2117 a->rstart = oldmat->rstart; 2118 a->rend = oldmat->rend; 2119 a->cstart = oldmat->cstart; 2120 a->cend = oldmat->cend; 2121 a->size = oldmat->size; 2122 a->rank = oldmat->rank; 2123 a->donotstash = oldmat->donotstash; 2124 a->roworiented = oldmat->roworiented; 2125 a->rowindices = 0; 2126 a->rowvalues = 0; 2127 a->getrowactive = PETSC_FALSE; 2128 a->barray = 0; 2129 a->rstart_bs = oldmat->rstart_bs; 2130 a->rend_bs = oldmat->rend_bs; 2131 a->cstart_bs = oldmat->cstart_bs; 2132 a->cend_bs = oldmat->cend_bs; 2133 2134 /* hash table stuff */ 2135 a->ht = 0; 2136 a->hd = 0; 2137 a->ht_size = 0; 2138 a->ht_flag = oldmat->ht_flag; 2139 a->ht_fact = oldmat->ht_fact; 2140 a->ht_total_ct = 0; 2141 a->ht_insert_ct = 0; 2142 2143 2144 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2145 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2146 a->cowners = a->rowners + a->size + 2; 2147 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2148 ierr = StashCreate_Private(matin->comm,1,oldmat->bs,&a->stash); CHKERRQ(ierr); 2149 ierr = StashCreate_Private(matin->comm,oldmat->bs,oldmat->bs,&a->bstash); CHKERRQ(ierr); 2150 if (oldmat->colmap) { 2151 #if defined (USE_CTABLE) 2152 ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr); 2153 #else 2154 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2155 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2156 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2157 #endif 2158 } else a->colmap = 0; 2159 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2160 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2161 PLogObjectMemory(mat,len*sizeof(int)); 2162 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2163 } else a->garray = 0; 2164 2165 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2166 PLogObjectParent(mat,a->lvec); 2167 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2168 2169 PLogObjectParent(mat,a->Mvctx); 2170 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2171 PLogObjectParent(mat,a->A); 2172 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2173 PLogObjectParent(mat,a->B); 2174 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2175 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2176 if (flg) { 2177 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2178 } 2179 *newmat = mat; 2180 PetscFunctionReturn(0); 2181 } 2182 2183 #include "sys.h" 2184 2185 #undef __FUNC__ 2186 #define __FUNC__ "MatLoad_MPIBAIJ" 2187 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2188 { 2189 Mat A; 2190 int i, nz, ierr, j,rstart, rend, fd; 2191 Scalar *vals,*buf; 2192 MPI_Comm comm = ((PetscObject)viewer)->comm; 2193 MPI_Status status; 2194 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2195 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2196 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2197 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2198 int dcount,kmax,k,nzcount,tmp; 2199 2200 PetscFunctionBegin; 2201 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2202 2203 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2204 if (!rank) { 2205 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2206 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2207 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2208 if (header[3] < 0) { 2209 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2210 } 2211 } 2212 2213 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2214 M = header[1]; N = header[2]; 2215 2216 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2217 2218 /* 2219 This code adds extra rows to make sure the number of rows is 2220 divisible by the blocksize 2221 */ 2222 Mbs = M/bs; 2223 extra_rows = bs - M + bs*(Mbs); 2224 if (extra_rows == bs) extra_rows = 0; 2225 else Mbs++; 2226 if (extra_rows &&!rank) { 2227 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2228 } 2229 2230 /* determine ownership of all rows */ 2231 mbs = Mbs/size + ((Mbs % size) > rank); 2232 m = mbs * bs; 2233 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2234 browners = rowners + size + 1; 2235 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2236 rowners[0] = 0; 2237 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2238 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2239 rstart = rowners[rank]; 2240 rend = rowners[rank+1]; 2241 2242 /* distribute row lengths to all processors */ 2243 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2244 if (!rank) { 2245 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2246 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2247 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2248 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2249 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2250 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2251 PetscFree(sndcounts); 2252 } else { 2253 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2254 } 2255 2256 if (!rank) { 2257 /* calculate the number of nonzeros on each processor */ 2258 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2259 PetscMemzero(procsnz,size*sizeof(int)); 2260 for ( i=0; i<size; i++ ) { 2261 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2262 procsnz[i] += rowlengths[j]; 2263 } 2264 } 2265 PetscFree(rowlengths); 2266 2267 /* determine max buffer needed and allocate it */ 2268 maxnz = 0; 2269 for ( i=0; i<size; i++ ) { 2270 maxnz = PetscMax(maxnz,procsnz[i]); 2271 } 2272 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2273 2274 /* read in my part of the matrix column indices */ 2275 nz = procsnz[0]; 2276 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2277 mycols = ibuf; 2278 if (size == 1) nz -= extra_rows; 2279 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2280 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2281 2282 /* read in every ones (except the last) and ship off */ 2283 for ( i=1; i<size-1; i++ ) { 2284 nz = procsnz[i]; 2285 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2286 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2287 } 2288 /* read in the stuff for the last proc */ 2289 if ( size != 1 ) { 2290 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2291 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2292 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2293 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2294 } 2295 PetscFree(cols); 2296 } else { 2297 /* determine buffer space needed for message */ 2298 nz = 0; 2299 for ( i=0; i<m; i++ ) { 2300 nz += locrowlens[i]; 2301 } 2302 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2303 mycols = ibuf; 2304 /* receive message of column indices*/ 2305 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2306 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2307 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2308 } 2309 2310 /* loop over local rows, determining number of off diagonal entries */ 2311 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2312 odlens = dlens + (rend-rstart); 2313 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2314 PetscMemzero(mask,3*Mbs*sizeof(int)); 2315 masked1 = mask + Mbs; 2316 masked2 = masked1 + Mbs; 2317 rowcount = 0; nzcount = 0; 2318 for ( i=0; i<mbs; i++ ) { 2319 dcount = 0; 2320 odcount = 0; 2321 for ( j=0; j<bs; j++ ) { 2322 kmax = locrowlens[rowcount]; 2323 for ( k=0; k<kmax; k++ ) { 2324 tmp = mycols[nzcount++]/bs; 2325 if (!mask[tmp]) { 2326 mask[tmp] = 1; 2327 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2328 else masked1[dcount++] = tmp; 2329 } 2330 } 2331 rowcount++; 2332 } 2333 2334 dlens[i] = dcount; 2335 odlens[i] = odcount; 2336 2337 /* zero out the mask elements we set */ 2338 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2339 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2340 } 2341 2342 /* create our matrix */ 2343 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2344 CHKERRQ(ierr); 2345 A = *newmat; 2346 MatSetOption(A,MAT_COLUMNS_SORTED); 2347 2348 if (!rank) { 2349 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2350 /* read in my part of the matrix numerical values */ 2351 nz = procsnz[0]; 2352 vals = buf; 2353 mycols = ibuf; 2354 if (size == 1) nz -= extra_rows; 2355 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2356 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2357 2358 /* insert into matrix */ 2359 jj = rstart*bs; 2360 for ( i=0; i<m; i++ ) { 2361 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2362 mycols += locrowlens[i]; 2363 vals += locrowlens[i]; 2364 jj++; 2365 } 2366 /* read in other processors (except the last one) and ship out */ 2367 for ( i=1; i<size-1; i++ ) { 2368 nz = procsnz[i]; 2369 vals = buf; 2370 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2371 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2372 } 2373 /* the last proc */ 2374 if ( size != 1 ){ 2375 nz = procsnz[i] - extra_rows; 2376 vals = buf; 2377 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2378 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2379 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2380 } 2381 PetscFree(procsnz); 2382 } else { 2383 /* receive numeric values */ 2384 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2385 2386 /* receive message of values*/ 2387 vals = buf; 2388 mycols = ibuf; 2389 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2390 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2391 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2392 2393 /* insert into matrix */ 2394 jj = rstart*bs; 2395 for ( i=0; i<m; i++ ) { 2396 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2397 mycols += locrowlens[i]; 2398 vals += locrowlens[i]; 2399 jj++; 2400 } 2401 } 2402 PetscFree(locrowlens); 2403 PetscFree(buf); 2404 PetscFree(ibuf); 2405 PetscFree(rowners); 2406 PetscFree(dlens); 2407 PetscFree(mask); 2408 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2409 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2410 PetscFunctionReturn(0); 2411 } 2412 2413 2414 2415 #undef __FUNC__ 2416 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2417 /*@ 2418 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2419 2420 Input Parameters: 2421 . mat - the matrix 2422 . fact - factor 2423 2424 Collective on Mat 2425 2426 Notes: 2427 This can also be set by the command line option: -mat_use_hash_table fact 2428 2429 .keywords: matrix, hashtable, factor, HT 2430 2431 .seealso: MatSetOption() 2432 @*/ 2433 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2434 { 2435 Mat_MPIBAIJ *baij; 2436 2437 PetscFunctionBegin; 2438 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2439 if (mat->type != MATMPIBAIJ) { 2440 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2441 } 2442 baij = (Mat_MPIBAIJ*) mat->data; 2443 baij->ht_fact = fact; 2444 PetscFunctionReturn(0); 2445 } 2446