1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.163 1999/03/16 21:10:39 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 /* 20 Local utility routine that creates a mapping from the global column 21 number to the local number in the off-diagonal part of the local 22 storage of the matrix. This is done in a non scable way since the 23 length of colmap equals the global matrix length. 24 */ 25 #undef __FUNC__ 26 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 27 static int CreateColmap_MPIBAIJ_Private(Mat mat) 28 { 29 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 30 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 31 int nbs = B->nbs,i,bs=B->bs; 32 #if defined (USE_CTABLE) 33 int ierr; 34 #endif 35 36 PetscFunctionBegin; 37 #if defined (USE_CTABLE) 38 ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr); 39 for ( i=0; i<nbs; i++ ){ 40 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 41 } 42 #else 43 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 44 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 45 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 46 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 47 #endif 48 PetscFunctionReturn(0); 49 } 50 51 #define CHUNKSIZE 10 52 53 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 54 { \ 55 \ 56 brow = row/bs; \ 57 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 58 rmax = aimax[brow]; nrow = ailen[brow]; \ 59 bcol = col/bs; \ 60 ridx = row % bs; cidx = col % bs; \ 61 low = 0; high = nrow; \ 62 while (high-low > 3) { \ 63 t = (low+high)/2; \ 64 if (rp[t] > bcol) high = t; \ 65 else low = t; \ 66 } \ 67 for ( _i=low; _i<high; _i++ ) { \ 68 if (rp[_i] > bcol) break; \ 69 if (rp[_i] == bcol) { \ 70 bap = ap + bs2*_i + bs*cidx + ridx; \ 71 if (addv == ADD_VALUES) *bap += value; \ 72 else *bap = value; \ 73 goto a_noinsert; \ 74 } \ 75 } \ 76 if (a->nonew == 1) goto a_noinsert; \ 77 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 78 if (nrow >= rmax) { \ 79 /* there is no extra room in row, therefore enlarge */ \ 80 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 81 Scalar *new_a; \ 82 \ 83 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 84 \ 85 /* malloc new storage space */ \ 86 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 87 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 88 new_j = (int *) (new_a + bs2*new_nz); \ 89 new_i = new_j + new_nz; \ 90 \ 91 /* copy over old data into new slots */ \ 92 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 93 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 94 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 95 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 96 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 97 len*sizeof(int)); \ 98 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 99 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 100 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 101 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 102 /* free up old matrix storage */ \ 103 PetscFree(a->a); \ 104 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 105 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 106 a->singlemalloc = 1; \ 107 \ 108 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 109 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 110 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 111 a->maxnz += bs2*CHUNKSIZE; \ 112 a->reallocs++; \ 113 a->nz++; \ 114 } \ 115 N = nrow++ - 1; \ 116 /* shift up all the later entries in this row */ \ 117 for ( ii=N; ii>=_i; ii-- ) { \ 118 rp[ii+1] = rp[ii]; \ 119 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 120 } \ 121 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 122 rp[_i] = bcol; \ 123 ap[bs2*_i + bs*cidx + ridx] = value; \ 124 a_noinsert:; \ 125 ailen[brow] = nrow; \ 126 } 127 128 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 129 { \ 130 \ 131 brow = row/bs; \ 132 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 133 rmax = bimax[brow]; nrow = bilen[brow]; \ 134 bcol = col/bs; \ 135 ridx = row % bs; cidx = col % bs; \ 136 low = 0; high = nrow; \ 137 while (high-low > 3) { \ 138 t = (low+high)/2; \ 139 if (rp[t] > bcol) high = t; \ 140 else low = t; \ 141 } \ 142 for ( _i=low; _i<high; _i++ ) { \ 143 if (rp[_i] > bcol) break; \ 144 if (rp[_i] == bcol) { \ 145 bap = ap + bs2*_i + bs*cidx + ridx; \ 146 if (addv == ADD_VALUES) *bap += value; \ 147 else *bap = value; \ 148 goto b_noinsert; \ 149 } \ 150 } \ 151 if (b->nonew == 1) goto b_noinsert; \ 152 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 153 if (nrow >= rmax) { \ 154 /* there is no extra room in row, therefore enlarge */ \ 155 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 156 Scalar *new_a; \ 157 \ 158 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 159 \ 160 /* malloc new storage space */ \ 161 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 162 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 163 new_j = (int *) (new_a + bs2*new_nz); \ 164 new_i = new_j + new_nz; \ 165 \ 166 /* copy over old data into new slots */ \ 167 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 168 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 169 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 170 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 171 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 172 len*sizeof(int)); \ 173 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 174 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 175 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 176 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 177 /* free up old matrix storage */ \ 178 PetscFree(b->a); \ 179 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 180 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 181 b->singlemalloc = 1; \ 182 \ 183 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 184 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 185 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 186 b->maxnz += bs2*CHUNKSIZE; \ 187 b->reallocs++; \ 188 b->nz++; \ 189 } \ 190 N = nrow++ - 1; \ 191 /* shift up all the later entries in this row */ \ 192 for ( ii=N; ii>=_i; ii-- ) { \ 193 rp[ii+1] = rp[ii]; \ 194 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 195 } \ 196 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 197 rp[_i] = bcol; \ 198 ap[bs2*_i + bs*cidx + ridx] = value; \ 199 b_noinsert:; \ 200 bilen[brow] = nrow; \ 201 } 202 203 #undef __FUNC__ 204 #define __FUNC__ "MatSetValues_MPIBAIJ" 205 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 206 { 207 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 208 Scalar value; 209 int ierr,i,j,row,col; 210 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 211 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 212 int cend_orig=baij->cend_bs,bs=baij->bs; 213 214 /* Some Variables required in the macro */ 215 Mat A = baij->A; 216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 217 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 218 Scalar *aa=a->a; 219 220 Mat B = baij->B; 221 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 222 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 223 Scalar *ba=b->a; 224 225 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 226 int low,high,t,ridx,cidx,bs2=a->bs2; 227 Scalar *ap,*bap; 228 229 PetscFunctionBegin; 230 for ( i=0; i<m; i++ ) { 231 if (im[i] < 0) continue; 232 #if defined(USE_PETSC_BOPT_g) 233 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 234 #endif 235 if (im[i] >= rstart_orig && im[i] < rend_orig) { 236 row = im[i] - rstart_orig; 237 for ( j=0; j<n; j++ ) { 238 if (in[j] >= cstart_orig && in[j] < cend_orig){ 239 col = in[j] - cstart_orig; 240 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 241 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 242 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 243 } else if (in[j] < 0) continue; 244 #if defined(USE_PETSC_BOPT_g) 245 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 246 #endif 247 else { 248 if (mat->was_assembled) { 249 if (!baij->colmap) { 250 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 251 } 252 #if defined (USE_CTABLE) 253 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr); 254 col = col - 1 + in[j]%bs; 255 #else 256 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 257 #endif 258 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 259 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 260 col = in[j]; 261 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 262 B = baij->B; 263 b = (Mat_SeqBAIJ *) (B)->data; 264 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 265 ba=b->a; 266 } 267 } else col = in[j]; 268 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 269 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 270 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 271 } 272 } 273 } else { 274 if ( !baij->donotstash) { 275 if (roworiented) { 276 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 277 } else { 278 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 279 } 280 } 281 } 282 } 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNC__ 287 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 288 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 289 { 290 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 291 Scalar *value,*barray=baij->barray; 292 int ierr,i,j,ii,jj,row,col; 293 int roworiented = baij->roworiented,rstart=baij->rstart ; 294 int rend=baij->rend,cstart=baij->cstart,stepval; 295 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 296 297 if(!barray) { 298 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 299 } 300 301 if (roworiented) { 302 stepval = (n-1)*bs; 303 } else { 304 stepval = (m-1)*bs; 305 } 306 for ( i=0; i<m; i++ ) { 307 if (im[i] < 0) continue; 308 #if defined(USE_PETSC_BOPT_g) 309 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 310 #endif 311 if (im[i] >= rstart && im[i] < rend) { 312 row = im[i] - rstart; 313 for ( j=0; j<n; j++ ) { 314 /* If NumCol = 1 then a copy is not required */ 315 if ((roworiented) && (n == 1)) { 316 barray = v + i*bs2; 317 } else if((!roworiented) && (m == 1)) { 318 barray = v + j*bs2; 319 } else { /* Here a copy is required */ 320 if (roworiented) { 321 value = v + i*(stepval+bs)*bs + j*bs; 322 } else { 323 value = v + j*(stepval+bs)*bs + i*bs; 324 } 325 for ( ii=0; ii<bs; ii++,value+=stepval ) { 326 for (jj=0; jj<bs; jj++ ) { 327 *barray++ = *value++; 328 } 329 } 330 barray -=bs2; 331 } 332 333 if (in[j] >= cstart && in[j] < cend){ 334 col = in[j] - cstart; 335 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 336 } 337 else if (in[j] < 0) continue; 338 #if defined(USE_PETSC_BOPT_g) 339 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 340 #endif 341 else { 342 if (mat->was_assembled) { 343 if (!baij->colmap) { 344 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 345 } 346 347 #if defined(USE_PETSC_BOPT_g) 348 #if defined (USE_CTABLE) 349 { int data; 350 ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr); 351 if((data - 1) % bs) 352 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 353 } 354 #else 355 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 356 #endif 357 #endif 358 #if defined (USE_CTABLE) 359 ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr); 360 col = (col - 1)/bs; 361 #else 362 col = (baij->colmap[in[j]] - 1)/bs; 363 #endif 364 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 365 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 366 col = in[j]; 367 } 368 } 369 else col = in[j]; 370 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 371 } 372 } 373 } else { 374 if (!baij->donotstash) { 375 if (roworiented) { 376 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 377 } else { 378 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 379 } 380 } 381 } 382 } 383 PetscFunctionReturn(0); 384 } 385 #define HASH_KEY 0.6180339887 386 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 387 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 388 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 389 #undef __FUNC__ 390 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 391 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 392 { 393 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 394 int ierr,i,j,row,col; 395 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 396 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 397 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 398 double tmp; 399 Scalar ** HD = baij->hd,value; 400 #if defined(USE_PETSC_BOPT_g) 401 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 402 #endif 403 404 PetscFunctionBegin; 405 406 for ( i=0; i<m; i++ ) { 407 #if defined(USE_PETSC_BOPT_g) 408 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 409 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 410 #endif 411 row = im[i]; 412 if (row >= rstart_orig && row < rend_orig) { 413 for ( j=0; j<n; j++ ) { 414 col = in[j]; 415 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 416 /* Look up into the Hash Table */ 417 key = (row/bs)*Nbs+(col/bs)+1; 418 h1 = HASH(size,key,tmp); 419 420 421 idx = h1; 422 #if defined(USE_PETSC_BOPT_g) 423 insert_ct++; 424 total_ct++; 425 if (HT[idx] != key) { 426 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 427 if (idx == size) { 428 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 429 if (idx == h1) { 430 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 431 } 432 } 433 } 434 #else 435 if (HT[idx] != key) { 436 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 437 if (idx == size) { 438 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 439 if (idx == h1) { 440 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 441 } 442 } 443 } 444 #endif 445 /* A HASH table entry is found, so insert the values at the correct address */ 446 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 447 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 448 } 449 } else { 450 if (!baij->donotstash) { 451 if (roworiented) { 452 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 453 } else { 454 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 455 } 456 } 457 } 458 } 459 #if defined(USE_PETSC_BOPT_g) 460 baij->ht_total_ct = total_ct; 461 baij->ht_insert_ct = insert_ct; 462 #endif 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNC__ 467 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 468 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 469 { 470 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 471 int ierr,i,j,ii,jj,row,col; 472 int roworiented = baij->roworiented,rstart=baij->rstart ; 473 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 474 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 475 double tmp; 476 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 477 #if defined(USE_PETSC_BOPT_g) 478 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 479 #endif 480 481 PetscFunctionBegin; 482 483 if (roworiented) { 484 stepval = (n-1)*bs; 485 } else { 486 stepval = (m-1)*bs; 487 } 488 for ( i=0; i<m; i++ ) { 489 #if defined(USE_PETSC_BOPT_g) 490 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 491 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 492 #endif 493 row = im[i]; 494 v_t = v + i*bs2; 495 if (row >= rstart && row < rend) { 496 for ( j=0; j<n; j++ ) { 497 col = in[j]; 498 499 /* Look up into the Hash Table */ 500 key = row*Nbs+col+1; 501 h1 = HASH(size,key,tmp); 502 503 idx = h1; 504 #if defined(USE_PETSC_BOPT_g) 505 total_ct++; 506 insert_ct++; 507 if (HT[idx] != key) { 508 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == size) { 510 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 511 if (idx == h1) { 512 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 513 } 514 } 515 } 516 #else 517 if (HT[idx] != key) { 518 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 519 if (idx == size) { 520 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 521 if (idx == h1) { 522 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 523 } 524 } 525 } 526 #endif 527 baij_a = HD[idx]; 528 if (roworiented) { 529 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 530 /* value = v + (i*(stepval+bs)+j)*bs; */ 531 value = v_t; 532 v_t += bs; 533 if (addv == ADD_VALUES) { 534 for ( ii=0; ii<bs; ii++,value+=stepval) { 535 for ( jj=ii; jj<bs2; jj+=bs ) { 536 baij_a[jj] += *value++; 537 } 538 } 539 } else { 540 for ( ii=0; ii<bs; ii++,value+=stepval) { 541 for ( jj=ii; jj<bs2; jj+=bs ) { 542 baij_a[jj] = *value++; 543 } 544 } 545 } 546 } else { 547 value = v + j*(stepval+bs)*bs + i*bs; 548 if (addv == ADD_VALUES) { 549 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 550 for ( jj=0; jj<bs; jj++ ) { 551 baij_a[jj] += *value++; 552 } 553 } 554 } else { 555 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 556 for ( jj=0; jj<bs; jj++ ) { 557 baij_a[jj] = *value++; 558 } 559 } 560 } 561 } 562 } 563 } else { 564 if (!baij->donotstash) { 565 if (roworiented) { 566 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } else { 568 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 569 } 570 } 571 } 572 } 573 #if defined(USE_PETSC_BOPT_g) 574 baij->ht_total_ct = total_ct; 575 baij->ht_insert_ct = insert_ct; 576 #endif 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNC__ 581 #define __FUNC__ "MatGetValues_MPIBAIJ" 582 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 583 { 584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 585 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 586 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 587 588 PetscFunctionBegin; 589 for ( i=0; i<m; i++ ) { 590 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 591 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 592 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 593 row = idxm[i] - bsrstart; 594 for ( j=0; j<n; j++ ) { 595 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 596 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 597 if (idxn[j] >= bscstart && idxn[j] < bscend){ 598 col = idxn[j] - bscstart; 599 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 600 } else { 601 if (!baij->colmap) { 602 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 603 } 604 #if defined (USE_CTABLE) 605 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr); 606 data --; 607 #else 608 data = baij->colmap[idxn[j]/bs]-1; 609 #endif 610 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 611 else { 612 col = data + idxn[j]%bs; 613 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 614 } 615 } 616 } 617 } else { 618 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNC__ 625 #define __FUNC__ "MatNorm_MPIBAIJ" 626 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 627 { 628 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 629 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 630 int ierr, i,bs2=baij->bs2; 631 double sum = 0.0; 632 Scalar *v; 633 634 PetscFunctionBegin; 635 if (baij->size == 1) { 636 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 637 } else { 638 if (type == NORM_FROBENIUS) { 639 v = amat->a; 640 for (i=0; i<amat->nz*bs2; i++ ) { 641 #if defined(USE_PETSC_COMPLEX) 642 sum += PetscReal(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 for (i=0; i<bmat->nz*bs2; i++ ) { 649 #if defined(USE_PETSC_COMPLEX) 650 sum += PetscReal(PetscConj(*v)*(*v)); v++; 651 #else 652 sum += (*v)*(*v); v++; 653 #endif 654 } 655 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 656 *norm = sqrt(*norm); 657 } else { 658 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 659 } 660 } 661 PetscFunctionReturn(0); 662 } 663 664 665 /* 666 Creates the hash table, and sets the table 667 This table is created only once. 668 If new entried need to be added to the matrix 669 then the hash table has to be destroyed and 670 recreated. 671 */ 672 #undef __FUNC__ 673 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 674 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 675 { 676 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 677 Mat A = baij->A, B=baij->B; 678 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 679 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 680 int size,bs2=baij->bs2,rstart=baij->rstart; 681 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 682 int *HT,key; 683 Scalar **HD; 684 double tmp; 685 #if defined(USE_PETSC_BOPT_g) 686 int ct=0,max=0; 687 #endif 688 689 PetscFunctionBegin; 690 baij->ht_size=(int)(factor*nz); 691 size = baij->ht_size; 692 693 if (baij->ht) { 694 PetscFunctionReturn(0); 695 } 696 697 /* Allocate Memory for Hash Table */ 698 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 699 baij->ht = (int*)(baij->hd + size); 700 HD = baij->hd; 701 HT = baij->ht; 702 703 704 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 705 706 707 /* Loop Over A */ 708 for ( i=0; i<a->mbs; i++ ) { 709 for ( j=ai[i]; j<ai[i+1]; j++ ) { 710 row = i+rstart; 711 col = aj[j]+cstart; 712 713 key = row*Nbs + col + 1; 714 h1 = HASH(size,key,tmp); 715 for ( k=0; k<size; k++ ){ 716 if (HT[(h1+k)%size] == 0.0) { 717 HT[(h1+k)%size] = key; 718 HD[(h1+k)%size] = a->a + j*bs2; 719 break; 720 #if defined(USE_PETSC_BOPT_g) 721 } else { 722 ct++; 723 #endif 724 } 725 } 726 #if defined(USE_PETSC_BOPT_g) 727 if (k> max) max = k; 728 #endif 729 } 730 } 731 /* Loop Over B */ 732 for ( i=0; i<b->mbs; i++ ) { 733 for ( j=bi[i]; j<bi[i+1]; j++ ) { 734 row = i+rstart; 735 col = garray[bj[j]]; 736 key = row*Nbs + col + 1; 737 h1 = HASH(size,key,tmp); 738 for ( k=0; k<size; k++ ){ 739 if (HT[(h1+k)%size] == 0.0) { 740 HT[(h1+k)%size] = key; 741 HD[(h1+k)%size] = b->a + j*bs2; 742 break; 743 #if defined(USE_PETSC_BOPT_g) 744 } else { 745 ct++; 746 #endif 747 } 748 } 749 #if defined(USE_PETSC_BOPT_g) 750 if (k> max) max = k; 751 #endif 752 } 753 } 754 755 /* Print Summary */ 756 #if defined(USE_PETSC_BOPT_g) 757 for ( i=0,j=0; i<size; i++) 758 if (HT[i]) {j++;} 759 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 760 (j== 0)? 0.0:((double)(ct+j))/j,max); 761 #endif 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNC__ 766 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 767 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 768 { 769 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 770 int ierr,nstash,reallocs; 771 InsertMode addv; 772 773 PetscFunctionBegin; 774 if (baij->donotstash) { 775 PetscFunctionReturn(0); 776 } 777 778 /* make sure all processors are either in INSERTMODE or ADDMODE */ 779 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 780 if (addv == (ADD_VALUES|INSERT_VALUES)) { 781 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 782 } 783 mat->insertmode = addv; /* in case this processor had no cache */ 784 785 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs); CHKERRQ(ierr); 786 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners); CHKERRQ(ierr); 787 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr); 788 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n", 789 nstash,reallocs); 790 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr); 791 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n", 792 nstash,reallocs); 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNC__ 797 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 798 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 799 { 800 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 801 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 802 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 803 int *row,*col,other_disassembled,r1,r2,r3; 804 Scalar *val; 805 InsertMode addv = mat->insertmode; 806 807 PetscFunctionBegin; 808 if (!baij->donotstash) { 809 while (1) { 810 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 811 if (!flg) break; 812 813 for ( i=0; i<n; ) { 814 /* Now identify the consecutive vals belonging to the same row */ 815 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 816 if (j < n) ncols = j-i; 817 else ncols = n-i; 818 /* Now assemble all these values with a single function call */ 819 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 820 i = j; 821 } 822 } 823 ierr = MatStashScatterEnd_Private(&mat->stash); CHKERRQ(ierr); 824 /* Now process the block-stash. Since the values are stashed column-oriented, 825 set the roworiented flag to column oriented, and after MatSetValues() 826 restore the original flags */ 827 r1 = baij->roworiented; 828 r2 = a->roworiented; 829 r3 = b->roworiented; 830 baij->roworiented = 0; 831 a->roworiented = 0; 832 b->roworiented = 0; 833 while (1) { 834 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 835 if (!flg) break; 836 837 for ( i=0; i<n; ) { 838 /* Now identify the consecutive vals belonging to the same row */ 839 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 840 if (j < n) ncols = j-i; 841 else ncols = n-i; 842 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr); 843 i = j; 844 } 845 } 846 ierr = MatStashScatterEnd_Private(&mat->bstash); CHKERRQ(ierr); 847 baij->roworiented = r1; 848 a->roworiented = r2; 849 b->roworiented = r3; 850 } 851 852 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 853 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 854 855 /* determine if any processor has disassembled, if so we must 856 also disassemble ourselfs, in order that we may reassemble. */ 857 /* 858 if nonzero structure of submatrix B cannot change then we know that 859 no processor disassembled thus we can skip this stuff 860 */ 861 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 862 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 863 if (mat->was_assembled && !other_disassembled) { 864 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 865 } 866 } 867 868 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 869 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 870 } 871 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 872 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 873 874 #if defined(USE_PETSC_BOPT_g) 875 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 876 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 877 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 878 baij->ht_total_ct = 0; 879 baij->ht_insert_ct = 0; 880 } 881 #endif 882 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 883 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 884 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 885 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 886 } 887 888 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 889 PetscFunctionReturn(0); 890 } 891 892 #undef __FUNC__ 893 #define __FUNC__ "MatView_MPIBAIJ_Binary" 894 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 895 { 896 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 897 int ierr; 898 899 PetscFunctionBegin; 900 if (baij->size == 1) { 901 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 902 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 903 PetscFunctionReturn(0); 904 } 905 906 #undef __FUNC__ 907 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 908 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 909 { 910 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 911 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 912 FILE *fd; 913 ViewerType vtype; 914 915 PetscFunctionBegin; 916 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 917 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 918 ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr); 919 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 920 MatInfo info; 921 MPI_Comm_rank(mat->comm,&rank); 922 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 923 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 924 PetscSequentialPhaseBegin(mat->comm,1); 925 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 926 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 927 baij->bs,(int)info.memory); 928 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 929 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 930 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 931 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 932 fflush(fd); 933 PetscSequentialPhaseEnd(mat->comm,1); 934 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 935 PetscFunctionReturn(0); 936 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 937 PetscPrintf(mat->comm," block size is %d\n",bs); 938 PetscFunctionReturn(0); 939 } 940 } 941 942 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 943 Draw draw; 944 PetscTruth isnull; 945 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 946 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 947 } 948 949 if (size == 1) { 950 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 951 } else { 952 /* assemble the entire matrix onto first processor. */ 953 Mat A; 954 Mat_SeqBAIJ *Aloc; 955 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 956 int mbs = baij->mbs; 957 Scalar *a; 958 959 if (!rank) { 960 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 961 } else { 962 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 963 } 964 PLogObjectParent(mat,A); 965 966 /* copy over the A part */ 967 Aloc = (Mat_SeqBAIJ*) baij->A->data; 968 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 969 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 970 971 for ( i=0; i<mbs; i++ ) { 972 rvals[0] = bs*(baij->rstart + i); 973 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 974 for ( j=ai[i]; j<ai[i+1]; j++ ) { 975 col = (baij->cstart+aj[j])*bs; 976 for (k=0; k<bs; k++ ) { 977 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 978 col++; a += bs; 979 } 980 } 981 } 982 /* copy over the B part */ 983 Aloc = (Mat_SeqBAIJ*) baij->B->data; 984 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 985 for ( i=0; i<mbs; i++ ) { 986 rvals[0] = bs*(baij->rstart + i); 987 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 988 for ( j=ai[i]; j<ai[i+1]; j++ ) { 989 col = baij->garray[aj[j]]*bs; 990 for (k=0; k<bs; k++ ) { 991 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 992 col++; a += bs; 993 } 994 } 995 } 996 PetscFree(rvals); 997 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 998 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 999 /* 1000 Everyone has to call to draw the matrix since the graphics waits are 1001 synchronized across all processors that share the Draw object 1002 */ 1003 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1004 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1005 } 1006 ierr = MatDestroy(A); CHKERRQ(ierr); 1007 } 1008 PetscFunctionReturn(0); 1009 } 1010 1011 1012 1013 #undef __FUNC__ 1014 #define __FUNC__ "MatView_MPIBAIJ" 1015 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1016 { 1017 int ierr; 1018 ViewerType vtype; 1019 1020 PetscFunctionBegin; 1021 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1022 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1023 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 1024 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 1025 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1026 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1027 } else { 1028 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1029 } 1030 PetscFunctionReturn(0); 1031 } 1032 1033 #undef __FUNC__ 1034 #define __FUNC__ "MatDestroy_MPIBAIJ" 1035 int MatDestroy_MPIBAIJ(Mat mat) 1036 { 1037 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1038 int ierr; 1039 1040 PetscFunctionBegin; 1041 if (--mat->refct > 0) PetscFunctionReturn(0); 1042 1043 if (mat->mapping) { 1044 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 1045 } 1046 if (mat->bmapping) { 1047 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 1048 } 1049 if (mat->rmap) { 1050 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1051 } 1052 if (mat->cmap) { 1053 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1054 } 1055 #if defined(USE_PETSC_LOG) 1056 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1057 #endif 1058 1059 ierr = MatStashDestroy_Private(&mat->stash); CHKERRQ(ierr); 1060 ierr = MatStashDestroy_Private(&mat->bstash); CHKERRQ(ierr); 1061 1062 PetscFree(baij->rowners); 1063 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1064 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1065 #if defined (USE_CTABLE) 1066 if (baij->colmap) TableDelete(baij->colmap); 1067 #else 1068 if (baij->colmap) PetscFree(baij->colmap); 1069 #endif 1070 if (baij->garray) PetscFree(baij->garray); 1071 if (baij->lvec) VecDestroy(baij->lvec); 1072 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1073 if (baij->rowvalues) PetscFree(baij->rowvalues); 1074 if (baij->barray) PetscFree(baij->barray); 1075 if (baij->hd) PetscFree(baij->hd); 1076 PetscFree(baij); 1077 PLogObjectDestroy(mat); 1078 PetscHeaderDestroy(mat); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 #undef __FUNC__ 1083 #define __FUNC__ "MatMult_MPIBAIJ" 1084 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1085 { 1086 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1087 int ierr, nt; 1088 1089 PetscFunctionBegin; 1090 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1091 if (nt != a->n) { 1092 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1093 } 1094 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1095 if (nt != a->m) { 1096 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1097 } 1098 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1099 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1100 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1101 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1102 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNC__ 1107 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1108 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1109 { 1110 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1111 int ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1115 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1116 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1117 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNC__ 1122 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1123 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1124 { 1125 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1126 int ierr; 1127 1128 PetscFunctionBegin; 1129 /* do nondiagonal part */ 1130 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1131 /* send it on its way */ 1132 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1133 /* do local part */ 1134 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1135 /* receive remote parts: note this assumes the values are not actually */ 1136 /* inserted in yy until the next line, which is true for my implementation*/ 1137 /* but is not perhaps always true. */ 1138 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNC__ 1143 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1144 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1145 { 1146 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1147 int ierr; 1148 1149 PetscFunctionBegin; 1150 /* do nondiagonal part */ 1151 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1152 /* send it on its way */ 1153 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1154 /* do local part */ 1155 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1156 /* receive remote parts: note this assumes the values are not actually */ 1157 /* inserted in yy until the next line, which is true for my implementation*/ 1158 /* but is not perhaps always true. */ 1159 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 /* 1164 This only works correctly for square matrices where the subblock A->A is the 1165 diagonal block 1166 */ 1167 #undef __FUNC__ 1168 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1169 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1170 { 1171 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1172 int ierr; 1173 1174 PetscFunctionBegin; 1175 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1176 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNC__ 1181 #define __FUNC__ "MatScale_MPIBAIJ" 1182 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1183 { 1184 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1185 int ierr; 1186 1187 PetscFunctionBegin; 1188 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1189 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNC__ 1194 #define __FUNC__ "MatGetSize_MPIBAIJ" 1195 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1196 { 1197 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1198 1199 PetscFunctionBegin; 1200 if (m) *m = mat->M; 1201 if (n) *n = mat->N; 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNC__ 1206 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1207 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1208 { 1209 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1210 1211 PetscFunctionBegin; 1212 *m = mat->m; *n = mat->n; 1213 PetscFunctionReturn(0); 1214 } 1215 1216 #undef __FUNC__ 1217 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1218 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1219 { 1220 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1221 1222 PetscFunctionBegin; 1223 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNC__ 1228 #define __FUNC__ "MatGetRow_MPIBAIJ" 1229 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1230 { 1231 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1232 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1233 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1234 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1235 int *cmap, *idx_p,cstart = mat->cstart; 1236 1237 PetscFunctionBegin; 1238 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1239 mat->getrowactive = PETSC_TRUE; 1240 1241 if (!mat->rowvalues && (idx || v)) { 1242 /* 1243 allocate enough space to hold information from the longest row. 1244 */ 1245 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1246 int max = 1,mbs = mat->mbs,tmp; 1247 for ( i=0; i<mbs; i++ ) { 1248 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1249 if (max < tmp) { max = tmp; } 1250 } 1251 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1252 CHKPTRQ(mat->rowvalues); 1253 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1254 } 1255 1256 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1257 lrow = row - brstart; 1258 1259 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1260 if (!v) {pvA = 0; pvB = 0;} 1261 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1262 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1263 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1264 nztot = nzA + nzB; 1265 1266 cmap = mat->garray; 1267 if (v || idx) { 1268 if (nztot) { 1269 /* Sort by increasing column numbers, assuming A and B already sorted */ 1270 int imark = -1; 1271 if (v) { 1272 *v = v_p = mat->rowvalues; 1273 for ( i=0; i<nzB; i++ ) { 1274 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1275 else break; 1276 } 1277 imark = i; 1278 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1279 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1280 } 1281 if (idx) { 1282 *idx = idx_p = mat->rowindices; 1283 if (imark > -1) { 1284 for ( i=0; i<imark; i++ ) { 1285 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1286 } 1287 } else { 1288 for ( i=0; i<nzB; i++ ) { 1289 if (cmap[cworkB[i]/bs] < cstart) 1290 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1291 else break; 1292 } 1293 imark = i; 1294 } 1295 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1296 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1297 } 1298 } else { 1299 if (idx) *idx = 0; 1300 if (v) *v = 0; 1301 } 1302 } 1303 *nz = nztot; 1304 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1305 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNC__ 1310 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1311 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1312 { 1313 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1314 1315 PetscFunctionBegin; 1316 if (baij->getrowactive == PETSC_FALSE) { 1317 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1318 } 1319 baij->getrowactive = PETSC_FALSE; 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNC__ 1324 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1325 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1326 { 1327 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1328 1329 PetscFunctionBegin; 1330 *bs = baij->bs; 1331 PetscFunctionReturn(0); 1332 } 1333 1334 #undef __FUNC__ 1335 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1336 int MatZeroEntries_MPIBAIJ(Mat A) 1337 { 1338 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1339 int ierr; 1340 1341 PetscFunctionBegin; 1342 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1343 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNC__ 1348 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1349 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1350 { 1351 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1352 Mat A = a->A, B = a->B; 1353 int ierr; 1354 double isend[5], irecv[5]; 1355 1356 PetscFunctionBegin; 1357 info->block_size = (double)a->bs; 1358 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1359 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1360 isend[3] = info->memory; isend[4] = info->mallocs; 1361 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1362 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1363 isend[3] += info->memory; isend[4] += info->mallocs; 1364 if (flag == MAT_LOCAL) { 1365 info->nz_used = isend[0]; 1366 info->nz_allocated = isend[1]; 1367 info->nz_unneeded = isend[2]; 1368 info->memory = isend[3]; 1369 info->mallocs = isend[4]; 1370 } else if (flag == MAT_GLOBAL_MAX) { 1371 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1372 info->nz_used = irecv[0]; 1373 info->nz_allocated = irecv[1]; 1374 info->nz_unneeded = irecv[2]; 1375 info->memory = irecv[3]; 1376 info->mallocs = irecv[4]; 1377 } else if (flag == MAT_GLOBAL_SUM) { 1378 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1379 info->nz_used = irecv[0]; 1380 info->nz_allocated = irecv[1]; 1381 info->nz_unneeded = irecv[2]; 1382 info->memory = irecv[3]; 1383 info->mallocs = irecv[4]; 1384 } else { 1385 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1386 } 1387 info->rows_global = (double)a->M; 1388 info->columns_global = (double)a->N; 1389 info->rows_local = (double)a->m; 1390 info->columns_local = (double)a->N; 1391 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1392 info->fill_ratio_needed = 0; 1393 info->factor_mallocs = 0; 1394 PetscFunctionReturn(0); 1395 } 1396 1397 #undef __FUNC__ 1398 #define __FUNC__ "MatSetOption_MPIBAIJ" 1399 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1400 { 1401 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1402 int ierr; 1403 1404 PetscFunctionBegin; 1405 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1406 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1407 op == MAT_COLUMNS_UNSORTED || 1408 op == MAT_COLUMNS_SORTED || 1409 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1410 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1411 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1412 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1413 } else if (op == MAT_ROW_ORIENTED) { 1414 a->roworiented = 1; 1415 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1416 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1417 } else if (op == MAT_ROWS_SORTED || 1418 op == MAT_ROWS_UNSORTED || 1419 op == MAT_SYMMETRIC || 1420 op == MAT_STRUCTURALLY_SYMMETRIC || 1421 op == MAT_YES_NEW_DIAGONALS || 1422 op == MAT_USE_HASH_TABLE) { 1423 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1424 } else if (op == MAT_COLUMN_ORIENTED) { 1425 a->roworiented = 0; 1426 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1427 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1428 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1429 a->donotstash = 1; 1430 } else if (op == MAT_NO_NEW_DIAGONALS) { 1431 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1432 } else if (op == MAT_USE_HASH_TABLE) { 1433 a->ht_flag = 1; 1434 } else { 1435 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1436 } 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNC__ 1441 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1442 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1443 { 1444 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1445 Mat_SeqBAIJ *Aloc; 1446 Mat B; 1447 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1448 int bs=baij->bs,mbs=baij->mbs; 1449 Scalar *a; 1450 1451 PetscFunctionBegin; 1452 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1453 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1454 CHKERRQ(ierr); 1455 1456 /* copy over the A part */ 1457 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1458 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1459 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1460 1461 for ( i=0; i<mbs; i++ ) { 1462 rvals[0] = bs*(baij->rstart + i); 1463 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1464 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1465 col = (baij->cstart+aj[j])*bs; 1466 for (k=0; k<bs; k++ ) { 1467 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1468 col++; a += bs; 1469 } 1470 } 1471 } 1472 /* copy over the B part */ 1473 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1474 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1475 for ( i=0; i<mbs; i++ ) { 1476 rvals[0] = bs*(baij->rstart + i); 1477 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1478 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1479 col = baij->garray[aj[j]]*bs; 1480 for (k=0; k<bs; k++ ) { 1481 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1482 col++; a += bs; 1483 } 1484 } 1485 } 1486 PetscFree(rvals); 1487 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1488 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1489 1490 if (matout != PETSC_NULL) { 1491 *matout = B; 1492 } else { 1493 PetscOps *Abops; 1494 MatOps Aops; 1495 1496 /* This isn't really an in-place transpose .... but free data structures from baij */ 1497 PetscFree(baij->rowners); 1498 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1499 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1500 #if defined (USE_CTABLE) 1501 if (baij->colmap) TableDelete(baij->colmap); 1502 #else 1503 if (baij->colmap) PetscFree(baij->colmap); 1504 #endif 1505 if (baij->garray) PetscFree(baij->garray); 1506 if (baij->lvec) VecDestroy(baij->lvec); 1507 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1508 PetscFree(baij); 1509 1510 /* 1511 This is horrible, horrible code. We need to keep the 1512 A pointers for the bops and ops but copy everything 1513 else from C. 1514 */ 1515 Abops = A->bops; 1516 Aops = A->ops; 1517 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1518 A->bops = Abops; 1519 A->ops = Aops; 1520 1521 PetscHeaderDestroy(B); 1522 } 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNC__ 1527 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1528 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1529 { 1530 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1531 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1532 int ierr,s1,s2,s3; 1533 1534 PetscFunctionBegin; 1535 if (ll) { 1536 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1537 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1538 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1539 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1540 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1541 } 1542 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNC__ 1547 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1548 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1549 { 1550 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1551 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1552 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1553 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1554 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1555 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1556 MPI_Comm comm = A->comm; 1557 MPI_Request *send_waits,*recv_waits; 1558 MPI_Status recv_status,*send_status; 1559 IS istmp; 1560 1561 PetscFunctionBegin; 1562 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1563 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1564 1565 /* first count number of contributors to each processor */ 1566 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1567 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1568 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1569 for ( i=0; i<N; i++ ) { 1570 idx = rows[i]; 1571 found = 0; 1572 for ( j=0; j<size; j++ ) { 1573 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1574 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1575 } 1576 } 1577 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1578 } 1579 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1580 1581 /* inform other processors of number of messages and max length*/ 1582 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1583 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1584 nrecvs = work[rank]; 1585 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1586 nmax = work[rank]; 1587 PetscFree(work); 1588 1589 /* post receives: */ 1590 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1591 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1592 for ( i=0; i<nrecvs; i++ ) { 1593 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1594 } 1595 1596 /* do sends: 1597 1) starts[i] gives the starting index in svalues for stuff going to 1598 the ith processor 1599 */ 1600 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1601 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1602 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1603 starts[0] = 0; 1604 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1605 for ( i=0; i<N; i++ ) { 1606 svalues[starts[owner[i]]++] = rows[i]; 1607 } 1608 ISRestoreIndices(is,&rows); 1609 1610 starts[0] = 0; 1611 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1612 count = 0; 1613 for ( i=0; i<size; i++ ) { 1614 if (procs[i]) { 1615 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1616 } 1617 } 1618 PetscFree(starts); 1619 1620 base = owners[rank]*bs; 1621 1622 /* wait on receives */ 1623 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1624 source = lens + nrecvs; 1625 count = nrecvs; slen = 0; 1626 while (count) { 1627 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1628 /* unpack receives into our local space */ 1629 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1630 source[imdex] = recv_status.MPI_SOURCE; 1631 lens[imdex] = n; 1632 slen += n; 1633 count--; 1634 } 1635 PetscFree(recv_waits); 1636 1637 /* move the data into the send scatter */ 1638 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1639 count = 0; 1640 for ( i=0; i<nrecvs; i++ ) { 1641 values = rvalues + i*nmax; 1642 for ( j=0; j<lens[i]; j++ ) { 1643 lrows[count++] = values[j] - base; 1644 } 1645 } 1646 PetscFree(rvalues); PetscFree(lens); 1647 PetscFree(owner); PetscFree(nprocs); 1648 1649 /* actually zap the local rows */ 1650 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1651 PLogObjectParent(A,istmp); 1652 1653 /* 1654 Zero the required rows. If the "diagonal block" of the matrix 1655 is square and the user wishes to set the diagonal we use seperate 1656 code so that MatSetValues() is not called for each diagonal allocating 1657 new memory, thus calling lots of mallocs and slowing things down. 1658 1659 Contributed by: Mathew Knepley 1660 */ 1661 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1662 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1663 if (diag && (l->A->M == l->A->N)) { 1664 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1665 } else if (diag) { 1666 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1667 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1668 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1669 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1670 } 1671 for ( i = 0; i < slen; i++ ) { 1672 row = lrows[i] + rstart_bs; 1673 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1674 } 1675 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1676 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1677 } else { 1678 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1679 } 1680 1681 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1682 PetscFree(lrows); 1683 1684 /* wait on sends */ 1685 if (nsends) { 1686 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1687 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1688 PetscFree(send_status); 1689 } 1690 PetscFree(send_waits); PetscFree(svalues); 1691 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #undef __FUNC__ 1696 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1697 int MatPrintHelp_MPIBAIJ(Mat A) 1698 { 1699 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1700 MPI_Comm comm = A->comm; 1701 static int called = 0; 1702 int ierr; 1703 1704 PetscFunctionBegin; 1705 if (!a->rank) { 1706 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1707 } 1708 if (called) {PetscFunctionReturn(0);} else called = 1; 1709 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1710 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1711 PetscFunctionReturn(0); 1712 } 1713 1714 #undef __FUNC__ 1715 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1716 int MatSetUnfactored_MPIBAIJ(Mat A) 1717 { 1718 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1719 int ierr; 1720 1721 PetscFunctionBegin; 1722 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 1726 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1727 1728 /* -------------------------------------------------------------------*/ 1729 static struct _MatOps MatOps_Values = { 1730 MatSetValues_MPIBAIJ, 1731 MatGetRow_MPIBAIJ, 1732 MatRestoreRow_MPIBAIJ, 1733 MatMult_MPIBAIJ, 1734 MatMultAdd_MPIBAIJ, 1735 MatMultTrans_MPIBAIJ, 1736 MatMultTransAdd_MPIBAIJ, 1737 0, 1738 0, 1739 0, 1740 0, 1741 0, 1742 0, 1743 0, 1744 MatTranspose_MPIBAIJ, 1745 MatGetInfo_MPIBAIJ, 1746 0, 1747 MatGetDiagonal_MPIBAIJ, 1748 MatDiagonalScale_MPIBAIJ, 1749 MatNorm_MPIBAIJ, 1750 MatAssemblyBegin_MPIBAIJ, 1751 MatAssemblyEnd_MPIBAIJ, 1752 0, 1753 MatSetOption_MPIBAIJ, 1754 MatZeroEntries_MPIBAIJ, 1755 MatZeroRows_MPIBAIJ, 1756 0, 1757 0, 1758 0, 1759 0, 1760 MatGetSize_MPIBAIJ, 1761 MatGetLocalSize_MPIBAIJ, 1762 MatGetOwnershipRange_MPIBAIJ, 1763 0, 1764 0, 1765 0, 1766 0, 1767 MatDuplicate_MPIBAIJ, 1768 0, 1769 0, 1770 0, 1771 0, 1772 0, 1773 MatGetSubMatrices_MPIBAIJ, 1774 MatIncreaseOverlap_MPIBAIJ, 1775 MatGetValues_MPIBAIJ, 1776 0, 1777 MatPrintHelp_MPIBAIJ, 1778 MatScale_MPIBAIJ, 1779 0, 1780 0, 1781 0, 1782 MatGetBlockSize_MPIBAIJ, 1783 0, 1784 0, 1785 0, 1786 0, 1787 0, 1788 0, 1789 MatSetUnfactored_MPIBAIJ, 1790 0, 1791 MatSetValuesBlocked_MPIBAIJ, 1792 0, 1793 0, 1794 0, 1795 MatGetMaps_Petsc}; 1796 1797 1798 EXTERN_C_BEGIN 1799 #undef __FUNC__ 1800 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1801 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1802 { 1803 PetscFunctionBegin; 1804 *a = ((Mat_MPIBAIJ *)A->data)->A; 1805 *iscopy = PETSC_FALSE; 1806 PetscFunctionReturn(0); 1807 } 1808 EXTERN_C_END 1809 1810 #undef __FUNC__ 1811 #define __FUNC__ "MatCreateMPIBAIJ" 1812 /*@C 1813 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1814 (block compressed row). For good matrix assembly performance 1815 the user should preallocate the matrix storage by setting the parameters 1816 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1817 performance can be increased by more than a factor of 50. 1818 1819 Collective on MPI_Comm 1820 1821 Input Parameters: 1822 + comm - MPI communicator 1823 . bs - size of blockk 1824 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1825 This value should be the same as the local size used in creating the 1826 y vector for the matrix-vector product y = Ax. 1827 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1828 This value should be the same as the local size used in creating the 1829 x vector for the matrix-vector product y = Ax. 1830 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1831 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1832 . d_nz - number of block nonzeros per block row in diagonal portion of local 1833 submatrix (same for all local rows) 1834 . d_nzz - array containing the number of block nonzeros in the various block rows 1835 of the in diagonal portion of the local (possibly different for each block 1836 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1837 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1838 submatrix (same for all local rows). 1839 - o_nzz - array containing the number of nonzeros in the various block rows of the 1840 off-diagonal portion of the local submatrix (possibly different for 1841 each block row) or PETSC_NULL. 1842 1843 Output Parameter: 1844 . A - the matrix 1845 1846 Options Database Keys: 1847 . -mat_no_unroll - uses code that does not unroll the loops in the 1848 block calculations (much slower) 1849 . -mat_block_size - size of the blocks to use 1850 . -mat_mpi - use the parallel matrix data structures even on one processor 1851 (defaults to using SeqBAIJ format on one processor) 1852 1853 Notes: 1854 The user MUST specify either the local or global matrix dimensions 1855 (possibly both). 1856 1857 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1858 than it must be used on all processors that share the object for that argument. 1859 1860 Storage Information: 1861 For a square global matrix we define each processor's diagonal portion 1862 to be its local rows and the corresponding columns (a square submatrix); 1863 each processor's off-diagonal portion encompasses the remainder of the 1864 local matrix (a rectangular submatrix). 1865 1866 The user can specify preallocated storage for the diagonal part of 1867 the local submatrix with either d_nz or d_nnz (not both). Set 1868 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1869 memory allocation. Likewise, specify preallocated storage for the 1870 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1871 1872 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1873 the figure below we depict these three local rows and all columns (0-11). 1874 1875 .vb 1876 0 1 2 3 4 5 6 7 8 9 10 11 1877 ------------------- 1878 row 3 | o o o d d d o o o o o o 1879 row 4 | o o o d d d o o o o o o 1880 row 5 | o o o d d d o o o o o o 1881 ------------------- 1882 .ve 1883 1884 Thus, any entries in the d locations are stored in the d (diagonal) 1885 submatrix, and any entries in the o locations are stored in the 1886 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1887 stored simply in the MATSEQBAIJ format for compressed row storage. 1888 1889 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1890 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1891 In general, for PDE problems in which most nonzeros are near the diagonal, 1892 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1893 or you will get TERRIBLE performance; see the users' manual chapter on 1894 matrices. 1895 1896 Level: intermediate 1897 1898 .keywords: matrix, block, aij, compressed row, sparse, parallel 1899 1900 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1901 @*/ 1902 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1903 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1904 { 1905 Mat B; 1906 Mat_MPIBAIJ *b; 1907 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1908 int flag1 = 0,flag2 = 0; 1909 1910 PetscFunctionBegin; 1911 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1912 1913 MPI_Comm_size(comm,&size); 1914 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 1915 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1916 if (!flag1 && !flag2 && size == 1) { 1917 if (M == PETSC_DECIDE) M = m; 1918 if (N == PETSC_DECIDE) N = n; 1919 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 *A = 0; 1924 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 1925 PLogObjectCreate(B); 1926 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1927 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1928 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1929 1930 B->ops->destroy = MatDestroy_MPIBAIJ; 1931 B->ops->view = MatView_MPIBAIJ; 1932 B->mapping = 0; 1933 B->factor = 0; 1934 B->assembled = PETSC_FALSE; 1935 1936 B->insertmode = NOT_SET_VALUES; 1937 MPI_Comm_rank(comm,&b->rank); 1938 MPI_Comm_size(comm,&b->size); 1939 1940 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1941 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1942 } 1943 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1944 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1945 } 1946 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1947 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1948 } 1949 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1950 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1951 1952 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1953 work[0] = m; work[1] = n; 1954 mbs = m/bs; nbs = n/bs; 1955 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1956 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1957 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1958 } 1959 if (m == PETSC_DECIDE) { 1960 Mbs = M/bs; 1961 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1962 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1963 m = mbs*bs; 1964 } 1965 if (n == PETSC_DECIDE) { 1966 Nbs = N/bs; 1967 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1968 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1969 n = nbs*bs; 1970 } 1971 if (mbs*bs != m || nbs*bs != n) { 1972 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1973 } 1974 1975 b->m = m; B->m = m; 1976 b->n = n; B->n = n; 1977 b->N = N; B->N = N; 1978 b->M = M; B->M = M; 1979 b->bs = bs; 1980 b->bs2 = bs*bs; 1981 b->mbs = mbs; 1982 b->nbs = nbs; 1983 b->Mbs = Mbs; 1984 b->Nbs = Nbs; 1985 1986 /* the information in the maps duplicates the information computed below, eventually 1987 we should remove the duplicate information that is not contained in the maps */ 1988 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1989 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1990 1991 /* build local table of row and column ownerships */ 1992 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1993 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1994 b->cowners = b->rowners + b->size + 2; 1995 b->rowners_bs = b->cowners + b->size + 2; 1996 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1997 b->rowners[0] = 0; 1998 for ( i=2; i<=b->size; i++ ) { 1999 b->rowners[i] += b->rowners[i-1]; 2000 } 2001 for ( i=0; i<=b->size; i++ ) { 2002 b->rowners_bs[i] = b->rowners[i]*bs; 2003 } 2004 b->rstart = b->rowners[b->rank]; 2005 b->rend = b->rowners[b->rank+1]; 2006 b->rstart_bs = b->rstart * bs; 2007 b->rend_bs = b->rend * bs; 2008 2009 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2010 b->cowners[0] = 0; 2011 for ( i=2; i<=b->size; i++ ) { 2012 b->cowners[i] += b->cowners[i-1]; 2013 } 2014 b->cstart = b->cowners[b->rank]; 2015 b->cend = b->cowners[b->rank+1]; 2016 b->cstart_bs = b->cstart * bs; 2017 b->cend_bs = b->cend * bs; 2018 2019 2020 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2021 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 2022 PLogObjectParent(B,b->A); 2023 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2024 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 2025 PLogObjectParent(B,b->B); 2026 2027 /* build cache for off array entries formed */ 2028 ierr = MatStashCreate_Private(comm,1,&B->stash); CHKERRQ(ierr); 2029 ierr = MatStashCreate_Private(comm,bs,&B->bstash); CHKERRQ(ierr); 2030 b->donotstash = 0; 2031 b->colmap = 0; 2032 b->garray = 0; 2033 b->roworiented = 1; 2034 2035 /* stuff used in block assembly */ 2036 b->barray = 0; 2037 2038 /* stuff used for matrix vector multiply */ 2039 b->lvec = 0; 2040 b->Mvctx = 0; 2041 2042 /* stuff for MatGetRow() */ 2043 b->rowindices = 0; 2044 b->rowvalues = 0; 2045 b->getrowactive = PETSC_FALSE; 2046 2047 /* hash table stuff */ 2048 b->ht = 0; 2049 b->hd = 0; 2050 b->ht_size = 0; 2051 b->ht_flag = 0; 2052 b->ht_fact = 0; 2053 b->ht_total_ct = 0; 2054 b->ht_insert_ct = 0; 2055 2056 *A = B; 2057 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2058 if (flg) { 2059 double fact = 1.39; 2060 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2061 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2062 if (fact <= 1.0) fact = 1.39; 2063 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2064 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2065 } 2066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2067 "MatGetDiagonalBlock_MPIBAIJ", 2068 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #undef __FUNC__ 2073 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2074 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2075 { 2076 Mat mat; 2077 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2078 int ierr, len=0, flg; 2079 2080 PetscFunctionBegin; 2081 *newmat = 0; 2082 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2083 PLogObjectCreate(mat); 2084 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2085 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2086 mat->ops->destroy = MatDestroy_MPIBAIJ; 2087 mat->ops->view = MatView_MPIBAIJ; 2088 mat->factor = matin->factor; 2089 mat->assembled = PETSC_TRUE; 2090 mat->insertmode = NOT_SET_VALUES; 2091 2092 a->m = mat->m = oldmat->m; 2093 a->n = mat->n = oldmat->n; 2094 a->M = mat->M = oldmat->M; 2095 a->N = mat->N = oldmat->N; 2096 2097 a->bs = oldmat->bs; 2098 a->bs2 = oldmat->bs2; 2099 a->mbs = oldmat->mbs; 2100 a->nbs = oldmat->nbs; 2101 a->Mbs = oldmat->Mbs; 2102 a->Nbs = oldmat->Nbs; 2103 2104 a->rstart = oldmat->rstart; 2105 a->rend = oldmat->rend; 2106 a->cstart = oldmat->cstart; 2107 a->cend = oldmat->cend; 2108 a->size = oldmat->size; 2109 a->rank = oldmat->rank; 2110 a->donotstash = oldmat->donotstash; 2111 a->roworiented = oldmat->roworiented; 2112 a->rowindices = 0; 2113 a->rowvalues = 0; 2114 a->getrowactive = PETSC_FALSE; 2115 a->barray = 0; 2116 a->rstart_bs = oldmat->rstart_bs; 2117 a->rend_bs = oldmat->rend_bs; 2118 a->cstart_bs = oldmat->cstart_bs; 2119 a->cend_bs = oldmat->cend_bs; 2120 2121 /* hash table stuff */ 2122 a->ht = 0; 2123 a->hd = 0; 2124 a->ht_size = 0; 2125 a->ht_flag = oldmat->ht_flag; 2126 a->ht_fact = oldmat->ht_fact; 2127 a->ht_total_ct = 0; 2128 a->ht_insert_ct = 0; 2129 2130 2131 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2132 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2133 a->cowners = a->rowners + a->size + 2; 2134 a->rowners_bs = a->cowners + a->size + 2; 2135 PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int)); 2136 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash); CHKERRQ(ierr); 2137 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash); CHKERRQ(ierr); 2138 if (oldmat->colmap) { 2139 #if defined (USE_CTABLE) 2140 ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr); 2141 #else 2142 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2143 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2144 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2145 #endif 2146 } else a->colmap = 0; 2147 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2148 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2149 PLogObjectMemory(mat,len*sizeof(int)); 2150 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2151 } else a->garray = 0; 2152 2153 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2154 PLogObjectParent(mat,a->lvec); 2155 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2156 2157 PLogObjectParent(mat,a->Mvctx); 2158 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2159 PLogObjectParent(mat,a->A); 2160 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2161 PLogObjectParent(mat,a->B); 2162 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2163 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2164 if (flg) { 2165 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2166 } 2167 *newmat = mat; 2168 PetscFunctionReturn(0); 2169 } 2170 2171 #include "sys.h" 2172 2173 #undef __FUNC__ 2174 #define __FUNC__ "MatLoad_MPIBAIJ" 2175 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2176 { 2177 Mat A; 2178 int i, nz, ierr, j,rstart, rend, fd; 2179 Scalar *vals,*buf; 2180 MPI_Comm comm = ((PetscObject)viewer)->comm; 2181 MPI_Status status; 2182 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2183 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2184 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2185 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2186 int dcount,kmax,k,nzcount,tmp; 2187 2188 PetscFunctionBegin; 2189 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2190 2191 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2192 if (!rank) { 2193 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2194 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2195 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2196 if (header[3] < 0) { 2197 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2198 } 2199 } 2200 2201 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2202 M = header[1]; N = header[2]; 2203 2204 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2205 2206 /* 2207 This code adds extra rows to make sure the number of rows is 2208 divisible by the blocksize 2209 */ 2210 Mbs = M/bs; 2211 extra_rows = bs - M + bs*(Mbs); 2212 if (extra_rows == bs) extra_rows = 0; 2213 else Mbs++; 2214 if (extra_rows &&!rank) { 2215 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2216 } 2217 2218 /* determine ownership of all rows */ 2219 mbs = Mbs/size + ((Mbs % size) > rank); 2220 m = mbs * bs; 2221 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2222 browners = rowners + size + 1; 2223 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2224 rowners[0] = 0; 2225 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2226 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2227 rstart = rowners[rank]; 2228 rend = rowners[rank+1]; 2229 2230 /* distribute row lengths to all processors */ 2231 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2232 if (!rank) { 2233 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2234 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2235 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2236 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2237 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2238 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2239 PetscFree(sndcounts); 2240 } else { 2241 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2242 } 2243 2244 if (!rank) { 2245 /* calculate the number of nonzeros on each processor */ 2246 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2247 PetscMemzero(procsnz,size*sizeof(int)); 2248 for ( i=0; i<size; i++ ) { 2249 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2250 procsnz[i] += rowlengths[j]; 2251 } 2252 } 2253 PetscFree(rowlengths); 2254 2255 /* determine max buffer needed and allocate it */ 2256 maxnz = 0; 2257 for ( i=0; i<size; i++ ) { 2258 maxnz = PetscMax(maxnz,procsnz[i]); 2259 } 2260 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2261 2262 /* read in my part of the matrix column indices */ 2263 nz = procsnz[0]; 2264 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2265 mycols = ibuf; 2266 if (size == 1) nz -= extra_rows; 2267 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2268 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2269 2270 /* read in every ones (except the last) and ship off */ 2271 for ( i=1; i<size-1; i++ ) { 2272 nz = procsnz[i]; 2273 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2274 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2275 } 2276 /* read in the stuff for the last proc */ 2277 if ( size != 1 ) { 2278 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2279 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2280 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2281 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2282 } 2283 PetscFree(cols); 2284 } else { 2285 /* determine buffer space needed for message */ 2286 nz = 0; 2287 for ( i=0; i<m; i++ ) { 2288 nz += locrowlens[i]; 2289 } 2290 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2291 mycols = ibuf; 2292 /* receive message of column indices*/ 2293 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2294 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2295 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2296 } 2297 2298 /* loop over local rows, determining number of off diagonal entries */ 2299 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2300 odlens = dlens + (rend-rstart); 2301 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2302 PetscMemzero(mask,3*Mbs*sizeof(int)); 2303 masked1 = mask + Mbs; 2304 masked2 = masked1 + Mbs; 2305 rowcount = 0; nzcount = 0; 2306 for ( i=0; i<mbs; i++ ) { 2307 dcount = 0; 2308 odcount = 0; 2309 for ( j=0; j<bs; j++ ) { 2310 kmax = locrowlens[rowcount]; 2311 for ( k=0; k<kmax; k++ ) { 2312 tmp = mycols[nzcount++]/bs; 2313 if (!mask[tmp]) { 2314 mask[tmp] = 1; 2315 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2316 else masked1[dcount++] = tmp; 2317 } 2318 } 2319 rowcount++; 2320 } 2321 2322 dlens[i] = dcount; 2323 odlens[i] = odcount; 2324 2325 /* zero out the mask elements we set */ 2326 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2327 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2328 } 2329 2330 /* create our matrix */ 2331 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2332 CHKERRQ(ierr); 2333 A = *newmat; 2334 MatSetOption(A,MAT_COLUMNS_SORTED); 2335 2336 if (!rank) { 2337 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2338 /* read in my part of the matrix numerical values */ 2339 nz = procsnz[0]; 2340 vals = buf; 2341 mycols = ibuf; 2342 if (size == 1) nz -= extra_rows; 2343 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2344 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2345 2346 /* insert into matrix */ 2347 jj = rstart*bs; 2348 for ( i=0; i<m; i++ ) { 2349 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2350 mycols += locrowlens[i]; 2351 vals += locrowlens[i]; 2352 jj++; 2353 } 2354 /* read in other processors (except the last one) and ship out */ 2355 for ( i=1; i<size-1; i++ ) { 2356 nz = procsnz[i]; 2357 vals = buf; 2358 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2359 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2360 } 2361 /* the last proc */ 2362 if ( size != 1 ){ 2363 nz = procsnz[i] - extra_rows; 2364 vals = buf; 2365 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2366 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2367 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2368 } 2369 PetscFree(procsnz); 2370 } else { 2371 /* receive numeric values */ 2372 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2373 2374 /* receive message of values*/ 2375 vals = buf; 2376 mycols = ibuf; 2377 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2378 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2379 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2380 2381 /* insert into matrix */ 2382 jj = rstart*bs; 2383 for ( i=0; i<m; i++ ) { 2384 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2385 mycols += locrowlens[i]; 2386 vals += locrowlens[i]; 2387 jj++; 2388 } 2389 } 2390 PetscFree(locrowlens); 2391 PetscFree(buf); 2392 PetscFree(ibuf); 2393 PetscFree(rowners); 2394 PetscFree(dlens); 2395 PetscFree(mask); 2396 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2397 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2398 PetscFunctionReturn(0); 2399 } 2400 2401 2402 2403 #undef __FUNC__ 2404 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2405 /*@ 2406 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2407 2408 Input Parameters: 2409 . mat - the matrix 2410 . fact - factor 2411 2412 Collective on Mat 2413 2414 Level: advanced 2415 2416 Notes: 2417 This can also be set by the command line option: -mat_use_hash_table fact 2418 2419 .keywords: matrix, hashtable, factor, HT 2420 2421 .seealso: MatSetOption() 2422 @*/ 2423 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2424 { 2425 Mat_MPIBAIJ *baij; 2426 2427 PetscFunctionBegin; 2428 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2429 if (mat->type != MATMPIBAIJ) { 2430 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2431 } 2432 baij = (Mat_MPIBAIJ*) mat->data; 2433 baij->ht_fact = fact; 2434 PetscFunctionReturn(0); 2435 } 2436