1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.164 1999/03/18 00:35:48 balay Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 /* 20 Local utility routine that creates a mapping from the global column 21 number to the local number in the off-diagonal part of the local 22 storage of the matrix. This is done in a non scable way since the 23 length of colmap equals the global matrix length. 24 */ 25 #undef __FUNC__ 26 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 27 static int CreateColmap_MPIBAIJ_Private(Mat mat) 28 { 29 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 30 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 31 int nbs = B->nbs,i,bs=B->bs; 32 #if defined (USE_CTABLE) 33 int ierr; 34 #endif 35 36 PetscFunctionBegin; 37 #if defined (USE_CTABLE) 38 ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr); 39 for ( i=0; i<nbs; i++ ){ 40 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 41 } 42 #else 43 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 44 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 45 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 46 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 47 #endif 48 PetscFunctionReturn(0); 49 } 50 51 #define CHUNKSIZE 10 52 53 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 54 { \ 55 \ 56 brow = row/bs; \ 57 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 58 rmax = aimax[brow]; nrow = ailen[brow]; \ 59 bcol = col/bs; \ 60 ridx = row % bs; cidx = col % bs; \ 61 low = 0; high = nrow; \ 62 while (high-low > 3) { \ 63 t = (low+high)/2; \ 64 if (rp[t] > bcol) high = t; \ 65 else low = t; \ 66 } \ 67 for ( _i=low; _i<high; _i++ ) { \ 68 if (rp[_i] > bcol) break; \ 69 if (rp[_i] == bcol) { \ 70 bap = ap + bs2*_i + bs*cidx + ridx; \ 71 if (addv == ADD_VALUES) *bap += value; \ 72 else *bap = value; \ 73 goto a_noinsert; \ 74 } \ 75 } \ 76 if (a->nonew == 1) goto a_noinsert; \ 77 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 78 if (nrow >= rmax) { \ 79 /* there is no extra room in row, therefore enlarge */ \ 80 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 81 Scalar *new_a; \ 82 \ 83 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 84 \ 85 /* malloc new storage space */ \ 86 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 87 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 88 new_j = (int *) (new_a + bs2*new_nz); \ 89 new_i = new_j + new_nz; \ 90 \ 91 /* copy over old data into new slots */ \ 92 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 93 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 94 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 95 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 96 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 97 len*sizeof(int)); \ 98 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 99 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 100 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 101 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 102 /* free up old matrix storage */ \ 103 PetscFree(a->a); \ 104 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 105 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 106 a->singlemalloc = 1; \ 107 \ 108 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 109 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 110 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 111 a->maxnz += bs2*CHUNKSIZE; \ 112 a->reallocs++; \ 113 a->nz++; \ 114 } \ 115 N = nrow++ - 1; \ 116 /* shift up all the later entries in this row */ \ 117 for ( ii=N; ii>=_i; ii-- ) { \ 118 rp[ii+1] = rp[ii]; \ 119 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 120 } \ 121 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 122 rp[_i] = bcol; \ 123 ap[bs2*_i + bs*cidx + ridx] = value; \ 124 a_noinsert:; \ 125 ailen[brow] = nrow; \ 126 } 127 128 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 129 { \ 130 \ 131 brow = row/bs; \ 132 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 133 rmax = bimax[brow]; nrow = bilen[brow]; \ 134 bcol = col/bs; \ 135 ridx = row % bs; cidx = col % bs; \ 136 low = 0; high = nrow; \ 137 while (high-low > 3) { \ 138 t = (low+high)/2; \ 139 if (rp[t] > bcol) high = t; \ 140 else low = t; \ 141 } \ 142 for ( _i=low; _i<high; _i++ ) { \ 143 if (rp[_i] > bcol) break; \ 144 if (rp[_i] == bcol) { \ 145 bap = ap + bs2*_i + bs*cidx + ridx; \ 146 if (addv == ADD_VALUES) *bap += value; \ 147 else *bap = value; \ 148 goto b_noinsert; \ 149 } \ 150 } \ 151 if (b->nonew == 1) goto b_noinsert; \ 152 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 153 if (nrow >= rmax) { \ 154 /* there is no extra room in row, therefore enlarge */ \ 155 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 156 Scalar *new_a; \ 157 \ 158 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 159 \ 160 /* malloc new storage space */ \ 161 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 162 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 163 new_j = (int *) (new_a + bs2*new_nz); \ 164 new_i = new_j + new_nz; \ 165 \ 166 /* copy over old data into new slots */ \ 167 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 168 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 169 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 170 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 171 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 172 len*sizeof(int)); \ 173 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 174 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 175 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 176 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 177 /* free up old matrix storage */ \ 178 PetscFree(b->a); \ 179 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 180 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 181 b->singlemalloc = 1; \ 182 \ 183 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 184 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 185 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 186 b->maxnz += bs2*CHUNKSIZE; \ 187 b->reallocs++; \ 188 b->nz++; \ 189 } \ 190 N = nrow++ - 1; \ 191 /* shift up all the later entries in this row */ \ 192 for ( ii=N; ii>=_i; ii-- ) { \ 193 rp[ii+1] = rp[ii]; \ 194 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 195 } \ 196 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 197 rp[_i] = bcol; \ 198 ap[bs2*_i + bs*cidx + ridx] = value; \ 199 b_noinsert:; \ 200 bilen[brow] = nrow; \ 201 } 202 203 #undef __FUNC__ 204 #define __FUNC__ "MatSetValues_MPIBAIJ" 205 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 206 { 207 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 208 Scalar value; 209 int ierr,i,j,row,col; 210 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 211 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 212 int cend_orig=baij->cend_bs,bs=baij->bs; 213 214 /* Some Variables required in the macro */ 215 Mat A = baij->A; 216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 217 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 218 Scalar *aa=a->a; 219 220 Mat B = baij->B; 221 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 222 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 223 Scalar *ba=b->a; 224 225 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 226 int low,high,t,ridx,cidx,bs2=a->bs2; 227 Scalar *ap,*bap; 228 229 PetscFunctionBegin; 230 for ( i=0; i<m; i++ ) { 231 if (im[i] < 0) continue; 232 #if defined(USE_PETSC_BOPT_g) 233 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 234 #endif 235 if (im[i] >= rstart_orig && im[i] < rend_orig) { 236 row = im[i] - rstart_orig; 237 for ( j=0; j<n; j++ ) { 238 if (in[j] >= cstart_orig && in[j] < cend_orig){ 239 col = in[j] - cstart_orig; 240 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 241 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 242 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 243 } else if (in[j] < 0) continue; 244 #if defined(USE_PETSC_BOPT_g) 245 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 246 #endif 247 else { 248 if (mat->was_assembled) { 249 if (!baij->colmap) { 250 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 251 } 252 #if defined (USE_CTABLE) 253 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr); 254 col = col - 1 + in[j]%bs; 255 #else 256 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 257 #endif 258 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 259 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 260 col = in[j]; 261 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 262 B = baij->B; 263 b = (Mat_SeqBAIJ *) (B)->data; 264 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 265 ba=b->a; 266 } 267 } else col = in[j]; 268 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 269 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 270 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 271 } 272 } 273 } else { 274 if ( !baij->donotstash) { 275 if (roworiented) { 276 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 277 } else { 278 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 279 } 280 } 281 } 282 } 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNC__ 287 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 288 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 289 { 290 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 291 Scalar *value,*barray=baij->barray; 292 int ierr,i,j,ii,jj,row,col; 293 int roworiented = baij->roworiented,rstart=baij->rstart ; 294 int rend=baij->rend,cstart=baij->cstart,stepval; 295 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 296 297 if(!barray) { 298 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 299 } 300 301 if (roworiented) { 302 stepval = (n-1)*bs; 303 } else { 304 stepval = (m-1)*bs; 305 } 306 for ( i=0; i<m; i++ ) { 307 if (im[i] < 0) continue; 308 #if defined(USE_PETSC_BOPT_g) 309 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 310 #endif 311 if (im[i] >= rstart && im[i] < rend) { 312 row = im[i] - rstart; 313 for ( j=0; j<n; j++ ) { 314 /* If NumCol = 1 then a copy is not required */ 315 if ((roworiented) && (n == 1)) { 316 barray = v + i*bs2; 317 } else if((!roworiented) && (m == 1)) { 318 barray = v + j*bs2; 319 } else { /* Here a copy is required */ 320 if (roworiented) { 321 value = v + i*(stepval+bs)*bs + j*bs; 322 } else { 323 value = v + j*(stepval+bs)*bs + i*bs; 324 } 325 for ( ii=0; ii<bs; ii++,value+=stepval ) { 326 for (jj=0; jj<bs; jj++ ) { 327 *barray++ = *value++; 328 } 329 } 330 barray -=bs2; 331 } 332 333 if (in[j] >= cstart && in[j] < cend){ 334 col = in[j] - cstart; 335 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 336 } 337 else if (in[j] < 0) continue; 338 #if defined(USE_PETSC_BOPT_g) 339 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 340 #endif 341 else { 342 if (mat->was_assembled) { 343 if (!baij->colmap) { 344 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 345 } 346 347 #if defined(USE_PETSC_BOPT_g) 348 #if defined (USE_CTABLE) 349 { int data; 350 ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr); 351 if((data - 1) % bs) 352 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 353 } 354 #else 355 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 356 #endif 357 #endif 358 #if defined (USE_CTABLE) 359 ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr); 360 col = (col - 1)/bs; 361 #else 362 col = (baij->colmap[in[j]] - 1)/bs; 363 #endif 364 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 365 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 366 col = in[j]; 367 } 368 } 369 else col = in[j]; 370 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 371 } 372 } 373 } else { 374 if (!baij->donotstash) { 375 if (roworiented) { 376 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 377 } else { 378 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 379 } 380 } 381 } 382 } 383 PetscFunctionReturn(0); 384 } 385 #define HASH_KEY 0.6180339887 386 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 387 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 388 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 389 #undef __FUNC__ 390 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 391 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 392 { 393 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 394 int ierr,i,j,row,col; 395 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 396 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 397 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 398 double tmp; 399 Scalar ** HD = baij->hd,value; 400 #if defined(USE_PETSC_BOPT_g) 401 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 402 #endif 403 404 PetscFunctionBegin; 405 406 for ( i=0; i<m; i++ ) { 407 #if defined(USE_PETSC_BOPT_g) 408 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 409 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 410 #endif 411 row = im[i]; 412 if (row >= rstart_orig && row < rend_orig) { 413 for ( j=0; j<n; j++ ) { 414 col = in[j]; 415 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 416 /* Look up into the Hash Table */ 417 key = (row/bs)*Nbs+(col/bs)+1; 418 h1 = HASH(size,key,tmp); 419 420 421 idx = h1; 422 #if defined(USE_PETSC_BOPT_g) 423 insert_ct++; 424 total_ct++; 425 if (HT[idx] != key) { 426 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 427 if (idx == size) { 428 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 429 if (idx == h1) { 430 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 431 } 432 } 433 } 434 #else 435 if (HT[idx] != key) { 436 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 437 if (idx == size) { 438 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 439 if (idx == h1) { 440 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 441 } 442 } 443 } 444 #endif 445 /* A HASH table entry is found, so insert the values at the correct address */ 446 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 447 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 448 } 449 } else { 450 if (!baij->donotstash) { 451 if (roworiented) { 452 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 453 } else { 454 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 455 } 456 } 457 } 458 } 459 #if defined(USE_PETSC_BOPT_g) 460 baij->ht_total_ct = total_ct; 461 baij->ht_insert_ct = insert_ct; 462 #endif 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNC__ 467 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 468 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 469 { 470 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 471 int ierr,i,j,ii,jj,row,col; 472 int roworiented = baij->roworiented,rstart=baij->rstart ; 473 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 474 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 475 double tmp; 476 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 477 #if defined(USE_PETSC_BOPT_g) 478 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 479 #endif 480 481 PetscFunctionBegin; 482 483 if (roworiented) { 484 stepval = (n-1)*bs; 485 } else { 486 stepval = (m-1)*bs; 487 } 488 for ( i=0; i<m; i++ ) { 489 #if defined(USE_PETSC_BOPT_g) 490 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 491 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 492 #endif 493 row = im[i]; 494 v_t = v + i*bs2; 495 if (row >= rstart && row < rend) { 496 for ( j=0; j<n; j++ ) { 497 col = in[j]; 498 499 /* Look up into the Hash Table */ 500 key = row*Nbs+col+1; 501 h1 = HASH(size,key,tmp); 502 503 idx = h1; 504 #if defined(USE_PETSC_BOPT_g) 505 total_ct++; 506 insert_ct++; 507 if (HT[idx] != key) { 508 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == size) { 510 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 511 if (idx == h1) { 512 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 513 } 514 } 515 } 516 #else 517 if (HT[idx] != key) { 518 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 519 if (idx == size) { 520 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 521 if (idx == h1) { 522 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 523 } 524 } 525 } 526 #endif 527 baij_a = HD[idx]; 528 if (roworiented) { 529 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 530 /* value = v + (i*(stepval+bs)+j)*bs; */ 531 value = v_t; 532 v_t += bs; 533 if (addv == ADD_VALUES) { 534 for ( ii=0; ii<bs; ii++,value+=stepval) { 535 for ( jj=ii; jj<bs2; jj+=bs ) { 536 baij_a[jj] += *value++; 537 } 538 } 539 } else { 540 for ( ii=0; ii<bs; ii++,value+=stepval) { 541 for ( jj=ii; jj<bs2; jj+=bs ) { 542 baij_a[jj] = *value++; 543 } 544 } 545 } 546 } else { 547 value = v + j*(stepval+bs)*bs + i*bs; 548 if (addv == ADD_VALUES) { 549 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 550 for ( jj=0; jj<bs; jj++ ) { 551 baij_a[jj] += *value++; 552 } 553 } 554 } else { 555 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 556 for ( jj=0; jj<bs; jj++ ) { 557 baij_a[jj] = *value++; 558 } 559 } 560 } 561 } 562 } 563 } else { 564 if (!baij->donotstash) { 565 if (roworiented) { 566 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } else { 568 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 569 } 570 } 571 } 572 } 573 #if defined(USE_PETSC_BOPT_g) 574 baij->ht_total_ct = total_ct; 575 baij->ht_insert_ct = insert_ct; 576 #endif 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNC__ 581 #define __FUNC__ "MatGetValues_MPIBAIJ" 582 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 583 { 584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 585 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 586 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 587 588 PetscFunctionBegin; 589 for ( i=0; i<m; i++ ) { 590 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 591 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 592 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 593 row = idxm[i] - bsrstart; 594 for ( j=0; j<n; j++ ) { 595 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 596 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 597 if (idxn[j] >= bscstart && idxn[j] < bscend){ 598 col = idxn[j] - bscstart; 599 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 600 } else { 601 if (!baij->colmap) { 602 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 603 } 604 #if defined (USE_CTABLE) 605 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr); 606 data --; 607 #else 608 data = baij->colmap[idxn[j]/bs]-1; 609 #endif 610 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 611 else { 612 col = data + idxn[j]%bs; 613 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 614 } 615 } 616 } 617 } else { 618 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNC__ 625 #define __FUNC__ "MatNorm_MPIBAIJ" 626 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 627 { 628 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 629 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 630 int ierr, i,bs2=baij->bs2; 631 double sum = 0.0; 632 Scalar *v; 633 634 PetscFunctionBegin; 635 if (baij->size == 1) { 636 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 637 } else { 638 if (type == NORM_FROBENIUS) { 639 v = amat->a; 640 for (i=0; i<amat->nz*bs2; i++ ) { 641 #if defined(USE_PETSC_COMPLEX) 642 sum += PetscReal(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 for (i=0; i<bmat->nz*bs2; i++ ) { 649 #if defined(USE_PETSC_COMPLEX) 650 sum += PetscReal(PetscConj(*v)*(*v)); v++; 651 #else 652 sum += (*v)*(*v); v++; 653 #endif 654 } 655 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 656 *norm = sqrt(*norm); 657 } else { 658 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 659 } 660 } 661 PetscFunctionReturn(0); 662 } 663 664 665 /* 666 Creates the hash table, and sets the table 667 This table is created only once. 668 If new entried need to be added to the matrix 669 then the hash table has to be destroyed and 670 recreated. 671 */ 672 #undef __FUNC__ 673 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 674 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 675 { 676 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 677 Mat A = baij->A, B=baij->B; 678 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 679 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 680 int size,bs2=baij->bs2,rstart=baij->rstart; 681 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 682 int *HT,key; 683 Scalar **HD; 684 double tmp; 685 #if defined(USE_PETSC_BOPT_g) 686 int ct=0,max=0; 687 #endif 688 689 PetscFunctionBegin; 690 baij->ht_size=(int)(factor*nz); 691 size = baij->ht_size; 692 693 if (baij->ht) { 694 PetscFunctionReturn(0); 695 } 696 697 /* Allocate Memory for Hash Table */ 698 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 699 baij->ht = (int*)(baij->hd + size); 700 HD = baij->hd; 701 HT = baij->ht; 702 703 704 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 705 706 707 /* Loop Over A */ 708 for ( i=0; i<a->mbs; i++ ) { 709 for ( j=ai[i]; j<ai[i+1]; j++ ) { 710 row = i+rstart; 711 col = aj[j]+cstart; 712 713 key = row*Nbs + col + 1; 714 h1 = HASH(size,key,tmp); 715 for ( k=0; k<size; k++ ){ 716 if (HT[(h1+k)%size] == 0.0) { 717 HT[(h1+k)%size] = key; 718 HD[(h1+k)%size] = a->a + j*bs2; 719 break; 720 #if defined(USE_PETSC_BOPT_g) 721 } else { 722 ct++; 723 #endif 724 } 725 } 726 #if defined(USE_PETSC_BOPT_g) 727 if (k> max) max = k; 728 #endif 729 } 730 } 731 /* Loop Over B */ 732 for ( i=0; i<b->mbs; i++ ) { 733 for ( j=bi[i]; j<bi[i+1]; j++ ) { 734 row = i+rstart; 735 col = garray[bj[j]]; 736 key = row*Nbs + col + 1; 737 h1 = HASH(size,key,tmp); 738 for ( k=0; k<size; k++ ){ 739 if (HT[(h1+k)%size] == 0.0) { 740 HT[(h1+k)%size] = key; 741 HD[(h1+k)%size] = b->a + j*bs2; 742 break; 743 #if defined(USE_PETSC_BOPT_g) 744 } else { 745 ct++; 746 #endif 747 } 748 } 749 #if defined(USE_PETSC_BOPT_g) 750 if (k> max) max = k; 751 #endif 752 } 753 } 754 755 /* Print Summary */ 756 #if defined(USE_PETSC_BOPT_g) 757 for ( i=0,j=0; i<size; i++) 758 if (HT[i]) {j++;} 759 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 760 (j== 0)? 0.0:((double)(ct+j))/j,max); 761 #endif 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNC__ 766 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 767 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 768 { 769 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 770 int ierr,nstash,reallocs; 771 InsertMode addv; 772 773 PetscFunctionBegin; 774 if (baij->donotstash) { 775 PetscFunctionReturn(0); 776 } 777 778 /* make sure all processors are either in INSERTMODE or ADDMODE */ 779 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 780 if (addv == (ADD_VALUES|INSERT_VALUES)) { 781 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 782 } 783 mat->insertmode = addv; /* in case this processor had no cache */ 784 785 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs); CHKERRQ(ierr); 786 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners); CHKERRQ(ierr); 787 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr); 788 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 789 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr); 790 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 791 PetscFunctionReturn(0); 792 } 793 794 #undef __FUNC__ 795 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 796 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 797 { 798 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 799 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 800 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 801 int *row,*col,other_disassembled,r1,r2,r3; 802 Scalar *val; 803 InsertMode addv = mat->insertmode; 804 805 PetscFunctionBegin; 806 if (!baij->donotstash) { 807 while (1) { 808 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 809 if (!flg) break; 810 811 for ( i=0; i<n; ) { 812 /* Now identify the consecutive vals belonging to the same row */ 813 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 814 if (j < n) ncols = j-i; 815 else ncols = n-i; 816 /* Now assemble all these values with a single function call */ 817 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 818 i = j; 819 } 820 } 821 ierr = MatStashScatterEnd_Private(&mat->stash); CHKERRQ(ierr); 822 /* Now process the block-stash. Since the values are stashed column-oriented, 823 set the roworiented flag to column oriented, and after MatSetValues() 824 restore the original flags */ 825 r1 = baij->roworiented; 826 r2 = a->roworiented; 827 r3 = b->roworiented; 828 baij->roworiented = 0; 829 a->roworiented = 0; 830 b->roworiented = 0; 831 while (1) { 832 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 833 if (!flg) break; 834 835 for ( i=0; i<n; ) { 836 /* Now identify the consecutive vals belonging to the same row */ 837 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 838 if (j < n) ncols = j-i; 839 else ncols = n-i; 840 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr); 841 i = j; 842 } 843 } 844 ierr = MatStashScatterEnd_Private(&mat->bstash); CHKERRQ(ierr); 845 baij->roworiented = r1; 846 a->roworiented = r2; 847 b->roworiented = r3; 848 } 849 850 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 851 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 852 853 /* determine if any processor has disassembled, if so we must 854 also disassemble ourselfs, in order that we may reassemble. */ 855 /* 856 if nonzero structure of submatrix B cannot change then we know that 857 no processor disassembled thus we can skip this stuff 858 */ 859 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 860 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 861 if (mat->was_assembled && !other_disassembled) { 862 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 863 } 864 } 865 866 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 867 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 868 } 869 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 870 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 871 872 #if defined(USE_PETSC_BOPT_g) 873 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 874 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 875 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 876 baij->ht_total_ct = 0; 877 baij->ht_insert_ct = 0; 878 } 879 #endif 880 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 881 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 882 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 883 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 884 } 885 886 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNC__ 891 #define __FUNC__ "MatView_MPIBAIJ_Binary" 892 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 893 { 894 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 895 int ierr; 896 897 PetscFunctionBegin; 898 if (baij->size == 1) { 899 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 900 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNC__ 905 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 906 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 907 { 908 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 909 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 910 FILE *fd; 911 ViewerType vtype; 912 913 PetscFunctionBegin; 914 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 915 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 916 ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr); 917 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 918 MatInfo info; 919 MPI_Comm_rank(mat->comm,&rank); 920 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 921 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 922 PetscSequentialPhaseBegin(mat->comm,1); 923 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 924 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 925 baij->bs,(int)info.memory); 926 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 927 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 928 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 929 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 930 fflush(fd); 931 PetscSequentialPhaseEnd(mat->comm,1); 932 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 933 PetscFunctionReturn(0); 934 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 935 PetscPrintf(mat->comm," block size is %d\n",bs); 936 PetscFunctionReturn(0); 937 } 938 } 939 940 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 941 Draw draw; 942 PetscTruth isnull; 943 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 944 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 945 } 946 947 if (size == 1) { 948 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 949 } else { 950 /* assemble the entire matrix onto first processor. */ 951 Mat A; 952 Mat_SeqBAIJ *Aloc; 953 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 954 int mbs = baij->mbs; 955 Scalar *a; 956 957 if (!rank) { 958 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 959 } else { 960 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 961 } 962 PLogObjectParent(mat,A); 963 964 /* copy over the A part */ 965 Aloc = (Mat_SeqBAIJ*) baij->A->data; 966 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 967 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 968 969 for ( i=0; i<mbs; i++ ) { 970 rvals[0] = bs*(baij->rstart + i); 971 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 972 for ( j=ai[i]; j<ai[i+1]; j++ ) { 973 col = (baij->cstart+aj[j])*bs; 974 for (k=0; k<bs; k++ ) { 975 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 976 col++; a += bs; 977 } 978 } 979 } 980 /* copy over the B part */ 981 Aloc = (Mat_SeqBAIJ*) baij->B->data; 982 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 983 for ( i=0; i<mbs; i++ ) { 984 rvals[0] = bs*(baij->rstart + i); 985 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 986 for ( j=ai[i]; j<ai[i+1]; j++ ) { 987 col = baij->garray[aj[j]]*bs; 988 for (k=0; k<bs; k++ ) { 989 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 990 col++; a += bs; 991 } 992 } 993 } 994 PetscFree(rvals); 995 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 996 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 997 /* 998 Everyone has to call to draw the matrix since the graphics waits are 999 synchronized across all processors that share the Draw object 1000 */ 1001 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1002 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1003 } 1004 ierr = MatDestroy(A); CHKERRQ(ierr); 1005 } 1006 PetscFunctionReturn(0); 1007 } 1008 1009 1010 1011 #undef __FUNC__ 1012 #define __FUNC__ "MatView_MPIBAIJ" 1013 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1014 { 1015 int ierr; 1016 ViewerType vtype; 1017 1018 PetscFunctionBegin; 1019 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1020 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1021 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 1022 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 1023 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1024 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1025 } else { 1026 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1027 } 1028 PetscFunctionReturn(0); 1029 } 1030 1031 #undef __FUNC__ 1032 #define __FUNC__ "MatDestroy_MPIBAIJ" 1033 int MatDestroy_MPIBAIJ(Mat mat) 1034 { 1035 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1036 int ierr; 1037 1038 PetscFunctionBegin; 1039 if (--mat->refct > 0) PetscFunctionReturn(0); 1040 1041 if (mat->mapping) { 1042 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 1043 } 1044 if (mat->bmapping) { 1045 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 1046 } 1047 if (mat->rmap) { 1048 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1049 } 1050 if (mat->cmap) { 1051 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1052 } 1053 #if defined(USE_PETSC_LOG) 1054 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1055 #endif 1056 1057 ierr = MatStashDestroy_Private(&mat->stash); CHKERRQ(ierr); 1058 ierr = MatStashDestroy_Private(&mat->bstash); CHKERRQ(ierr); 1059 1060 PetscFree(baij->rowners); 1061 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1062 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1063 #if defined (USE_CTABLE) 1064 if (baij->colmap) TableDelete(baij->colmap); 1065 #else 1066 if (baij->colmap) PetscFree(baij->colmap); 1067 #endif 1068 if (baij->garray) PetscFree(baij->garray); 1069 if (baij->lvec) VecDestroy(baij->lvec); 1070 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1071 if (baij->rowvalues) PetscFree(baij->rowvalues); 1072 if (baij->barray) PetscFree(baij->barray); 1073 if (baij->hd) PetscFree(baij->hd); 1074 PetscFree(baij); 1075 PLogObjectDestroy(mat); 1076 PetscHeaderDestroy(mat); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNC__ 1081 #define __FUNC__ "MatMult_MPIBAIJ" 1082 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1083 { 1084 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1085 int ierr, nt; 1086 1087 PetscFunctionBegin; 1088 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1089 if (nt != a->n) { 1090 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1091 } 1092 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1093 if (nt != a->m) { 1094 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1095 } 1096 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1097 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1098 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1099 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1100 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNC__ 1105 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1106 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1107 { 1108 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1109 int ierr; 1110 1111 PetscFunctionBegin; 1112 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1113 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1114 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1115 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNC__ 1120 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1121 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1122 { 1123 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1124 int ierr; 1125 1126 PetscFunctionBegin; 1127 /* do nondiagonal part */ 1128 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1129 /* send it on its way */ 1130 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1131 /* do local part */ 1132 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1133 /* receive remote parts: note this assumes the values are not actually */ 1134 /* inserted in yy until the next line, which is true for my implementation*/ 1135 /* but is not perhaps always true. */ 1136 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNC__ 1141 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1142 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1143 { 1144 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1145 int ierr; 1146 1147 PetscFunctionBegin; 1148 /* do nondiagonal part */ 1149 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1150 /* send it on its way */ 1151 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1152 /* do local part */ 1153 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1154 /* receive remote parts: note this assumes the values are not actually */ 1155 /* inserted in yy until the next line, which is true for my implementation*/ 1156 /* but is not perhaps always true. */ 1157 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 /* 1162 This only works correctly for square matrices where the subblock A->A is the 1163 diagonal block 1164 */ 1165 #undef __FUNC__ 1166 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1167 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1168 { 1169 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1170 int ierr; 1171 1172 PetscFunctionBegin; 1173 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1174 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNC__ 1179 #define __FUNC__ "MatScale_MPIBAIJ" 1180 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1181 { 1182 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1183 int ierr; 1184 1185 PetscFunctionBegin; 1186 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1187 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNC__ 1192 #define __FUNC__ "MatGetSize_MPIBAIJ" 1193 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1194 { 1195 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1196 1197 PetscFunctionBegin; 1198 if (m) *m = mat->M; 1199 if (n) *n = mat->N; 1200 PetscFunctionReturn(0); 1201 } 1202 1203 #undef __FUNC__ 1204 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1205 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1206 { 1207 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1208 1209 PetscFunctionBegin; 1210 *m = mat->m; *n = mat->n; 1211 PetscFunctionReturn(0); 1212 } 1213 1214 #undef __FUNC__ 1215 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1216 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1217 { 1218 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1219 1220 PetscFunctionBegin; 1221 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1222 PetscFunctionReturn(0); 1223 } 1224 1225 #undef __FUNC__ 1226 #define __FUNC__ "MatGetRow_MPIBAIJ" 1227 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1228 { 1229 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1230 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1231 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1232 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1233 int *cmap, *idx_p,cstart = mat->cstart; 1234 1235 PetscFunctionBegin; 1236 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1237 mat->getrowactive = PETSC_TRUE; 1238 1239 if (!mat->rowvalues && (idx || v)) { 1240 /* 1241 allocate enough space to hold information from the longest row. 1242 */ 1243 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1244 int max = 1,mbs = mat->mbs,tmp; 1245 for ( i=0; i<mbs; i++ ) { 1246 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1247 if (max < tmp) { max = tmp; } 1248 } 1249 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1250 CHKPTRQ(mat->rowvalues); 1251 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1252 } 1253 1254 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1255 lrow = row - brstart; 1256 1257 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1258 if (!v) {pvA = 0; pvB = 0;} 1259 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1260 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1261 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1262 nztot = nzA + nzB; 1263 1264 cmap = mat->garray; 1265 if (v || idx) { 1266 if (nztot) { 1267 /* Sort by increasing column numbers, assuming A and B already sorted */ 1268 int imark = -1; 1269 if (v) { 1270 *v = v_p = mat->rowvalues; 1271 for ( i=0; i<nzB; i++ ) { 1272 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1273 else break; 1274 } 1275 imark = i; 1276 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1277 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1278 } 1279 if (idx) { 1280 *idx = idx_p = mat->rowindices; 1281 if (imark > -1) { 1282 for ( i=0; i<imark; i++ ) { 1283 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1284 } 1285 } else { 1286 for ( i=0; i<nzB; i++ ) { 1287 if (cmap[cworkB[i]/bs] < cstart) 1288 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1289 else break; 1290 } 1291 imark = i; 1292 } 1293 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1294 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1295 } 1296 } else { 1297 if (idx) *idx = 0; 1298 if (v) *v = 0; 1299 } 1300 } 1301 *nz = nztot; 1302 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1303 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNC__ 1308 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1309 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1310 { 1311 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1312 1313 PetscFunctionBegin; 1314 if (baij->getrowactive == PETSC_FALSE) { 1315 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1316 } 1317 baij->getrowactive = PETSC_FALSE; 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNC__ 1322 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1323 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1324 { 1325 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1326 1327 PetscFunctionBegin; 1328 *bs = baij->bs; 1329 PetscFunctionReturn(0); 1330 } 1331 1332 #undef __FUNC__ 1333 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1334 int MatZeroEntries_MPIBAIJ(Mat A) 1335 { 1336 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1337 int ierr; 1338 1339 PetscFunctionBegin; 1340 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1341 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNC__ 1346 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1347 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1348 { 1349 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1350 Mat A = a->A, B = a->B; 1351 int ierr; 1352 double isend[5], irecv[5]; 1353 1354 PetscFunctionBegin; 1355 info->block_size = (double)a->bs; 1356 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1357 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1358 isend[3] = info->memory; isend[4] = info->mallocs; 1359 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1360 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1361 isend[3] += info->memory; isend[4] += info->mallocs; 1362 if (flag == MAT_LOCAL) { 1363 info->nz_used = isend[0]; 1364 info->nz_allocated = isend[1]; 1365 info->nz_unneeded = isend[2]; 1366 info->memory = isend[3]; 1367 info->mallocs = isend[4]; 1368 } else if (flag == MAT_GLOBAL_MAX) { 1369 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1370 info->nz_used = irecv[0]; 1371 info->nz_allocated = irecv[1]; 1372 info->nz_unneeded = irecv[2]; 1373 info->memory = irecv[3]; 1374 info->mallocs = irecv[4]; 1375 } else if (flag == MAT_GLOBAL_SUM) { 1376 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1377 info->nz_used = irecv[0]; 1378 info->nz_allocated = irecv[1]; 1379 info->nz_unneeded = irecv[2]; 1380 info->memory = irecv[3]; 1381 info->mallocs = irecv[4]; 1382 } else { 1383 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1384 } 1385 info->rows_global = (double)a->M; 1386 info->columns_global = (double)a->N; 1387 info->rows_local = (double)a->m; 1388 info->columns_local = (double)a->N; 1389 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1390 info->fill_ratio_needed = 0; 1391 info->factor_mallocs = 0; 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #undef __FUNC__ 1396 #define __FUNC__ "MatSetOption_MPIBAIJ" 1397 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1398 { 1399 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1400 int ierr; 1401 1402 PetscFunctionBegin; 1403 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1404 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1405 op == MAT_COLUMNS_UNSORTED || 1406 op == MAT_COLUMNS_SORTED || 1407 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1408 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1409 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1410 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1411 } else if (op == MAT_ROW_ORIENTED) { 1412 a->roworiented = 1; 1413 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1414 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1415 } else if (op == MAT_ROWS_SORTED || 1416 op == MAT_ROWS_UNSORTED || 1417 op == MAT_SYMMETRIC || 1418 op == MAT_STRUCTURALLY_SYMMETRIC || 1419 op == MAT_YES_NEW_DIAGONALS || 1420 op == MAT_USE_HASH_TABLE) { 1421 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1422 } else if (op == MAT_COLUMN_ORIENTED) { 1423 a->roworiented = 0; 1424 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1425 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1426 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1427 a->donotstash = 1; 1428 } else if (op == MAT_NO_NEW_DIAGONALS) { 1429 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1430 } else if (op == MAT_USE_HASH_TABLE) { 1431 a->ht_flag = 1; 1432 } else { 1433 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1434 } 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNC__ 1439 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1440 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1441 { 1442 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1443 Mat_SeqBAIJ *Aloc; 1444 Mat B; 1445 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1446 int bs=baij->bs,mbs=baij->mbs; 1447 Scalar *a; 1448 1449 PetscFunctionBegin; 1450 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1451 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1452 CHKERRQ(ierr); 1453 1454 /* copy over the A part */ 1455 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1456 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1457 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1458 1459 for ( i=0; i<mbs; i++ ) { 1460 rvals[0] = bs*(baij->rstart + i); 1461 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1462 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1463 col = (baij->cstart+aj[j])*bs; 1464 for (k=0; k<bs; k++ ) { 1465 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1466 col++; a += bs; 1467 } 1468 } 1469 } 1470 /* copy over the B part */ 1471 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1472 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1473 for ( i=0; i<mbs; i++ ) { 1474 rvals[0] = bs*(baij->rstart + i); 1475 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1476 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1477 col = baij->garray[aj[j]]*bs; 1478 for (k=0; k<bs; k++ ) { 1479 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1480 col++; a += bs; 1481 } 1482 } 1483 } 1484 PetscFree(rvals); 1485 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1486 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1487 1488 if (matout != PETSC_NULL) { 1489 *matout = B; 1490 } else { 1491 PetscOps *Abops; 1492 MatOps Aops; 1493 1494 /* This isn't really an in-place transpose .... but free data structures from baij */ 1495 PetscFree(baij->rowners); 1496 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1497 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1498 #if defined (USE_CTABLE) 1499 if (baij->colmap) TableDelete(baij->colmap); 1500 #else 1501 if (baij->colmap) PetscFree(baij->colmap); 1502 #endif 1503 if (baij->garray) PetscFree(baij->garray); 1504 if (baij->lvec) VecDestroy(baij->lvec); 1505 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1506 PetscFree(baij); 1507 1508 /* 1509 This is horrible, horrible code. We need to keep the 1510 A pointers for the bops and ops but copy everything 1511 else from C. 1512 */ 1513 Abops = A->bops; 1514 Aops = A->ops; 1515 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1516 A->bops = Abops; 1517 A->ops = Aops; 1518 1519 PetscHeaderDestroy(B); 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNC__ 1525 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1526 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1527 { 1528 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1529 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1530 int ierr,s1,s2,s3; 1531 1532 PetscFunctionBegin; 1533 if (ll) { 1534 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1535 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1536 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1537 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1538 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1539 } 1540 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNC__ 1545 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1546 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1547 { 1548 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1549 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1550 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1551 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1552 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1553 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1554 MPI_Comm comm = A->comm; 1555 MPI_Request *send_waits,*recv_waits; 1556 MPI_Status recv_status,*send_status; 1557 IS istmp; 1558 1559 PetscFunctionBegin; 1560 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1561 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1562 1563 /* first count number of contributors to each processor */ 1564 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1565 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1566 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1567 for ( i=0; i<N; i++ ) { 1568 idx = rows[i]; 1569 found = 0; 1570 for ( j=0; j<size; j++ ) { 1571 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1572 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1573 } 1574 } 1575 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1576 } 1577 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1578 1579 /* inform other processors of number of messages and max length*/ 1580 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1581 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1582 nrecvs = work[rank]; 1583 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1584 nmax = work[rank]; 1585 PetscFree(work); 1586 1587 /* post receives: */ 1588 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1589 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1590 for ( i=0; i<nrecvs; i++ ) { 1591 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1592 } 1593 1594 /* do sends: 1595 1) starts[i] gives the starting index in svalues for stuff going to 1596 the ith processor 1597 */ 1598 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1599 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1600 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1601 starts[0] = 0; 1602 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1603 for ( i=0; i<N; i++ ) { 1604 svalues[starts[owner[i]]++] = rows[i]; 1605 } 1606 ISRestoreIndices(is,&rows); 1607 1608 starts[0] = 0; 1609 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1610 count = 0; 1611 for ( i=0; i<size; i++ ) { 1612 if (procs[i]) { 1613 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1614 } 1615 } 1616 PetscFree(starts); 1617 1618 base = owners[rank]*bs; 1619 1620 /* wait on receives */ 1621 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1622 source = lens + nrecvs; 1623 count = nrecvs; slen = 0; 1624 while (count) { 1625 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1626 /* unpack receives into our local space */ 1627 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1628 source[imdex] = recv_status.MPI_SOURCE; 1629 lens[imdex] = n; 1630 slen += n; 1631 count--; 1632 } 1633 PetscFree(recv_waits); 1634 1635 /* move the data into the send scatter */ 1636 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1637 count = 0; 1638 for ( i=0; i<nrecvs; i++ ) { 1639 values = rvalues + i*nmax; 1640 for ( j=0; j<lens[i]; j++ ) { 1641 lrows[count++] = values[j] - base; 1642 } 1643 } 1644 PetscFree(rvalues); PetscFree(lens); 1645 PetscFree(owner); PetscFree(nprocs); 1646 1647 /* actually zap the local rows */ 1648 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1649 PLogObjectParent(A,istmp); 1650 1651 /* 1652 Zero the required rows. If the "diagonal block" of the matrix 1653 is square and the user wishes to set the diagonal we use seperate 1654 code so that MatSetValues() is not called for each diagonal allocating 1655 new memory, thus calling lots of mallocs and slowing things down. 1656 1657 Contributed by: Mathew Knepley 1658 */ 1659 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1660 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1661 if (diag && (l->A->M == l->A->N)) { 1662 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1663 } else if (diag) { 1664 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1665 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1666 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1667 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1668 } 1669 for ( i = 0; i < slen; i++ ) { 1670 row = lrows[i] + rstart_bs; 1671 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1672 } 1673 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1674 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1675 } else { 1676 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1677 } 1678 1679 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1680 PetscFree(lrows); 1681 1682 /* wait on sends */ 1683 if (nsends) { 1684 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1685 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1686 PetscFree(send_status); 1687 } 1688 PetscFree(send_waits); PetscFree(svalues); 1689 1690 PetscFunctionReturn(0); 1691 } 1692 1693 #undef __FUNC__ 1694 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1695 int MatPrintHelp_MPIBAIJ(Mat A) 1696 { 1697 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1698 MPI_Comm comm = A->comm; 1699 static int called = 0; 1700 int ierr; 1701 1702 PetscFunctionBegin; 1703 if (!a->rank) { 1704 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1705 } 1706 if (called) {PetscFunctionReturn(0);} else called = 1; 1707 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1708 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1709 PetscFunctionReturn(0); 1710 } 1711 1712 #undef __FUNC__ 1713 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1714 int MatSetUnfactored_MPIBAIJ(Mat A) 1715 { 1716 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1717 int ierr; 1718 1719 PetscFunctionBegin; 1720 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1721 PetscFunctionReturn(0); 1722 } 1723 1724 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1725 1726 /* -------------------------------------------------------------------*/ 1727 static struct _MatOps MatOps_Values = { 1728 MatSetValues_MPIBAIJ, 1729 MatGetRow_MPIBAIJ, 1730 MatRestoreRow_MPIBAIJ, 1731 MatMult_MPIBAIJ, 1732 MatMultAdd_MPIBAIJ, 1733 MatMultTrans_MPIBAIJ, 1734 MatMultTransAdd_MPIBAIJ, 1735 0, 1736 0, 1737 0, 1738 0, 1739 0, 1740 0, 1741 0, 1742 MatTranspose_MPIBAIJ, 1743 MatGetInfo_MPIBAIJ, 1744 0, 1745 MatGetDiagonal_MPIBAIJ, 1746 MatDiagonalScale_MPIBAIJ, 1747 MatNorm_MPIBAIJ, 1748 MatAssemblyBegin_MPIBAIJ, 1749 MatAssemblyEnd_MPIBAIJ, 1750 0, 1751 MatSetOption_MPIBAIJ, 1752 MatZeroEntries_MPIBAIJ, 1753 MatZeroRows_MPIBAIJ, 1754 0, 1755 0, 1756 0, 1757 0, 1758 MatGetSize_MPIBAIJ, 1759 MatGetLocalSize_MPIBAIJ, 1760 MatGetOwnershipRange_MPIBAIJ, 1761 0, 1762 0, 1763 0, 1764 0, 1765 MatDuplicate_MPIBAIJ, 1766 0, 1767 0, 1768 0, 1769 0, 1770 0, 1771 MatGetSubMatrices_MPIBAIJ, 1772 MatIncreaseOverlap_MPIBAIJ, 1773 MatGetValues_MPIBAIJ, 1774 0, 1775 MatPrintHelp_MPIBAIJ, 1776 MatScale_MPIBAIJ, 1777 0, 1778 0, 1779 0, 1780 MatGetBlockSize_MPIBAIJ, 1781 0, 1782 0, 1783 0, 1784 0, 1785 0, 1786 0, 1787 MatSetUnfactored_MPIBAIJ, 1788 0, 1789 MatSetValuesBlocked_MPIBAIJ, 1790 0, 1791 0, 1792 0, 1793 MatGetMaps_Petsc}; 1794 1795 1796 EXTERN_C_BEGIN 1797 #undef __FUNC__ 1798 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1799 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1800 { 1801 PetscFunctionBegin; 1802 *a = ((Mat_MPIBAIJ *)A->data)->A; 1803 *iscopy = PETSC_FALSE; 1804 PetscFunctionReturn(0); 1805 } 1806 EXTERN_C_END 1807 1808 #undef __FUNC__ 1809 #define __FUNC__ "MatCreateMPIBAIJ" 1810 /*@C 1811 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1812 (block compressed row). For good matrix assembly performance 1813 the user should preallocate the matrix storage by setting the parameters 1814 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1815 performance can be increased by more than a factor of 50. 1816 1817 Collective on MPI_Comm 1818 1819 Input Parameters: 1820 + comm - MPI communicator 1821 . bs - size of blockk 1822 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1823 This value should be the same as the local size used in creating the 1824 y vector for the matrix-vector product y = Ax. 1825 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1826 This value should be the same as the local size used in creating the 1827 x vector for the matrix-vector product y = Ax. 1828 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1829 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1830 . d_nz - number of block nonzeros per block row in diagonal portion of local 1831 submatrix (same for all local rows) 1832 . d_nzz - array containing the number of block nonzeros in the various block rows 1833 of the in diagonal portion of the local (possibly different for each block 1834 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1835 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1836 submatrix (same for all local rows). 1837 - o_nzz - array containing the number of nonzeros in the various block rows of the 1838 off-diagonal portion of the local submatrix (possibly different for 1839 each block row) or PETSC_NULL. 1840 1841 Output Parameter: 1842 . A - the matrix 1843 1844 Options Database Keys: 1845 . -mat_no_unroll - uses code that does not unroll the loops in the 1846 block calculations (much slower) 1847 . -mat_block_size - size of the blocks to use 1848 . -mat_mpi - use the parallel matrix data structures even on one processor 1849 (defaults to using SeqBAIJ format on one processor) 1850 1851 Notes: 1852 The user MUST specify either the local or global matrix dimensions 1853 (possibly both). 1854 1855 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1856 than it must be used on all processors that share the object for that argument. 1857 1858 Storage Information: 1859 For a square global matrix we define each processor's diagonal portion 1860 to be its local rows and the corresponding columns (a square submatrix); 1861 each processor's off-diagonal portion encompasses the remainder of the 1862 local matrix (a rectangular submatrix). 1863 1864 The user can specify preallocated storage for the diagonal part of 1865 the local submatrix with either d_nz or d_nnz (not both). Set 1866 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1867 memory allocation. Likewise, specify preallocated storage for the 1868 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1869 1870 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1871 the figure below we depict these three local rows and all columns (0-11). 1872 1873 .vb 1874 0 1 2 3 4 5 6 7 8 9 10 11 1875 ------------------- 1876 row 3 | o o o d d d o o o o o o 1877 row 4 | o o o d d d o o o o o o 1878 row 5 | o o o d d d o o o o o o 1879 ------------------- 1880 .ve 1881 1882 Thus, any entries in the d locations are stored in the d (diagonal) 1883 submatrix, and any entries in the o locations are stored in the 1884 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1885 stored simply in the MATSEQBAIJ format for compressed row storage. 1886 1887 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1888 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1889 In general, for PDE problems in which most nonzeros are near the diagonal, 1890 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1891 or you will get TERRIBLE performance; see the users' manual chapter on 1892 matrices. 1893 1894 Level: intermediate 1895 1896 .keywords: matrix, block, aij, compressed row, sparse, parallel 1897 1898 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1899 @*/ 1900 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1901 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1902 { 1903 Mat B; 1904 Mat_MPIBAIJ *b; 1905 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1906 int flag1 = 0,flag2 = 0; 1907 1908 PetscFunctionBegin; 1909 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1910 1911 MPI_Comm_size(comm,&size); 1912 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 1913 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1914 if (!flag1 && !flag2 && size == 1) { 1915 if (M == PETSC_DECIDE) M = m; 1916 if (N == PETSC_DECIDE) N = n; 1917 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 *A = 0; 1922 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 1923 PLogObjectCreate(B); 1924 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1925 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1926 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1927 1928 B->ops->destroy = MatDestroy_MPIBAIJ; 1929 B->ops->view = MatView_MPIBAIJ; 1930 B->mapping = 0; 1931 B->factor = 0; 1932 B->assembled = PETSC_FALSE; 1933 1934 B->insertmode = NOT_SET_VALUES; 1935 MPI_Comm_rank(comm,&b->rank); 1936 MPI_Comm_size(comm,&b->size); 1937 1938 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1939 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1940 } 1941 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1942 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1943 } 1944 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1945 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1946 } 1947 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1948 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1949 1950 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1951 work[0] = m; work[1] = n; 1952 mbs = m/bs; nbs = n/bs; 1953 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1954 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1955 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1956 } 1957 if (m == PETSC_DECIDE) { 1958 Mbs = M/bs; 1959 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1960 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1961 m = mbs*bs; 1962 } 1963 if (n == PETSC_DECIDE) { 1964 Nbs = N/bs; 1965 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1966 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1967 n = nbs*bs; 1968 } 1969 if (mbs*bs != m || nbs*bs != n) { 1970 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1971 } 1972 1973 b->m = m; B->m = m; 1974 b->n = n; B->n = n; 1975 b->N = N; B->N = N; 1976 b->M = M; B->M = M; 1977 b->bs = bs; 1978 b->bs2 = bs*bs; 1979 b->mbs = mbs; 1980 b->nbs = nbs; 1981 b->Mbs = Mbs; 1982 b->Nbs = Nbs; 1983 1984 /* the information in the maps duplicates the information computed below, eventually 1985 we should remove the duplicate information that is not contained in the maps */ 1986 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1987 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1988 1989 /* build local table of row and column ownerships */ 1990 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1991 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1992 b->cowners = b->rowners + b->size + 2; 1993 b->rowners_bs = b->cowners + b->size + 2; 1994 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1995 b->rowners[0] = 0; 1996 for ( i=2; i<=b->size; i++ ) { 1997 b->rowners[i] += b->rowners[i-1]; 1998 } 1999 for ( i=0; i<=b->size; i++ ) { 2000 b->rowners_bs[i] = b->rowners[i]*bs; 2001 } 2002 b->rstart = b->rowners[b->rank]; 2003 b->rend = b->rowners[b->rank+1]; 2004 b->rstart_bs = b->rstart * bs; 2005 b->rend_bs = b->rend * bs; 2006 2007 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2008 b->cowners[0] = 0; 2009 for ( i=2; i<=b->size; i++ ) { 2010 b->cowners[i] += b->cowners[i-1]; 2011 } 2012 b->cstart = b->cowners[b->rank]; 2013 b->cend = b->cowners[b->rank+1]; 2014 b->cstart_bs = b->cstart * bs; 2015 b->cend_bs = b->cend * bs; 2016 2017 2018 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2019 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 2020 PLogObjectParent(B,b->A); 2021 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2022 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 2023 PLogObjectParent(B,b->B); 2024 2025 /* build cache for off array entries formed */ 2026 ierr = MatStashCreate_Private(comm,1,&B->stash); CHKERRQ(ierr); 2027 ierr = MatStashCreate_Private(comm,bs,&B->bstash); CHKERRQ(ierr); 2028 b->donotstash = 0; 2029 b->colmap = 0; 2030 b->garray = 0; 2031 b->roworiented = 1; 2032 2033 /* stuff used in block assembly */ 2034 b->barray = 0; 2035 2036 /* stuff used for matrix vector multiply */ 2037 b->lvec = 0; 2038 b->Mvctx = 0; 2039 2040 /* stuff for MatGetRow() */ 2041 b->rowindices = 0; 2042 b->rowvalues = 0; 2043 b->getrowactive = PETSC_FALSE; 2044 2045 /* hash table stuff */ 2046 b->ht = 0; 2047 b->hd = 0; 2048 b->ht_size = 0; 2049 b->ht_flag = 0; 2050 b->ht_fact = 0; 2051 b->ht_total_ct = 0; 2052 b->ht_insert_ct = 0; 2053 2054 *A = B; 2055 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2056 if (flg) { 2057 double fact = 1.39; 2058 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2059 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2060 if (fact <= 1.0) fact = 1.39; 2061 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2062 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2063 } 2064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2065 "MatGetDiagonalBlock_MPIBAIJ", 2066 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNC__ 2071 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2072 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2073 { 2074 Mat mat; 2075 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2076 int ierr, len=0, flg; 2077 2078 PetscFunctionBegin; 2079 *newmat = 0; 2080 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2081 PLogObjectCreate(mat); 2082 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2083 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2084 mat->ops->destroy = MatDestroy_MPIBAIJ; 2085 mat->ops->view = MatView_MPIBAIJ; 2086 mat->factor = matin->factor; 2087 mat->assembled = PETSC_TRUE; 2088 mat->insertmode = NOT_SET_VALUES; 2089 2090 a->m = mat->m = oldmat->m; 2091 a->n = mat->n = oldmat->n; 2092 a->M = mat->M = oldmat->M; 2093 a->N = mat->N = oldmat->N; 2094 2095 a->bs = oldmat->bs; 2096 a->bs2 = oldmat->bs2; 2097 a->mbs = oldmat->mbs; 2098 a->nbs = oldmat->nbs; 2099 a->Mbs = oldmat->Mbs; 2100 a->Nbs = oldmat->Nbs; 2101 2102 a->rstart = oldmat->rstart; 2103 a->rend = oldmat->rend; 2104 a->cstart = oldmat->cstart; 2105 a->cend = oldmat->cend; 2106 a->size = oldmat->size; 2107 a->rank = oldmat->rank; 2108 a->donotstash = oldmat->donotstash; 2109 a->roworiented = oldmat->roworiented; 2110 a->rowindices = 0; 2111 a->rowvalues = 0; 2112 a->getrowactive = PETSC_FALSE; 2113 a->barray = 0; 2114 a->rstart_bs = oldmat->rstart_bs; 2115 a->rend_bs = oldmat->rend_bs; 2116 a->cstart_bs = oldmat->cstart_bs; 2117 a->cend_bs = oldmat->cend_bs; 2118 2119 /* hash table stuff */ 2120 a->ht = 0; 2121 a->hd = 0; 2122 a->ht_size = 0; 2123 a->ht_flag = oldmat->ht_flag; 2124 a->ht_fact = oldmat->ht_fact; 2125 a->ht_total_ct = 0; 2126 a->ht_insert_ct = 0; 2127 2128 2129 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2130 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2131 a->cowners = a->rowners + a->size + 2; 2132 a->rowners_bs = a->cowners + a->size + 2; 2133 PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int)); 2134 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash); CHKERRQ(ierr); 2135 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash); CHKERRQ(ierr); 2136 if (oldmat->colmap) { 2137 #if defined (USE_CTABLE) 2138 ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr); 2139 #else 2140 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2141 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2142 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2143 #endif 2144 } else a->colmap = 0; 2145 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2146 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2147 PLogObjectMemory(mat,len*sizeof(int)); 2148 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2149 } else a->garray = 0; 2150 2151 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2152 PLogObjectParent(mat,a->lvec); 2153 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2154 2155 PLogObjectParent(mat,a->Mvctx); 2156 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2157 PLogObjectParent(mat,a->A); 2158 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2159 PLogObjectParent(mat,a->B); 2160 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2161 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2162 if (flg) { 2163 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2164 } 2165 *newmat = mat; 2166 PetscFunctionReturn(0); 2167 } 2168 2169 #include "sys.h" 2170 2171 #undef __FUNC__ 2172 #define __FUNC__ "MatLoad_MPIBAIJ" 2173 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2174 { 2175 Mat A; 2176 int i, nz, ierr, j,rstart, rend, fd; 2177 Scalar *vals,*buf; 2178 MPI_Comm comm = ((PetscObject)viewer)->comm; 2179 MPI_Status status; 2180 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2181 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2182 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2183 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2184 int dcount,kmax,k,nzcount,tmp; 2185 2186 PetscFunctionBegin; 2187 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2188 2189 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2190 if (!rank) { 2191 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2192 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2193 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2194 if (header[3] < 0) { 2195 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2196 } 2197 } 2198 2199 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2200 M = header[1]; N = header[2]; 2201 2202 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2203 2204 /* 2205 This code adds extra rows to make sure the number of rows is 2206 divisible by the blocksize 2207 */ 2208 Mbs = M/bs; 2209 extra_rows = bs - M + bs*(Mbs); 2210 if (extra_rows == bs) extra_rows = 0; 2211 else Mbs++; 2212 if (extra_rows &&!rank) { 2213 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2214 } 2215 2216 /* determine ownership of all rows */ 2217 mbs = Mbs/size + ((Mbs % size) > rank); 2218 m = mbs * bs; 2219 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2220 browners = rowners + size + 1; 2221 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2222 rowners[0] = 0; 2223 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2224 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2225 rstart = rowners[rank]; 2226 rend = rowners[rank+1]; 2227 2228 /* distribute row lengths to all processors */ 2229 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2230 if (!rank) { 2231 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2232 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2233 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2234 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2235 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2236 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2237 PetscFree(sndcounts); 2238 } else { 2239 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2240 } 2241 2242 if (!rank) { 2243 /* calculate the number of nonzeros on each processor */ 2244 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2245 PetscMemzero(procsnz,size*sizeof(int)); 2246 for ( i=0; i<size; i++ ) { 2247 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2248 procsnz[i] += rowlengths[j]; 2249 } 2250 } 2251 PetscFree(rowlengths); 2252 2253 /* determine max buffer needed and allocate it */ 2254 maxnz = 0; 2255 for ( i=0; i<size; i++ ) { 2256 maxnz = PetscMax(maxnz,procsnz[i]); 2257 } 2258 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2259 2260 /* read in my part of the matrix column indices */ 2261 nz = procsnz[0]; 2262 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2263 mycols = ibuf; 2264 if (size == 1) nz -= extra_rows; 2265 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2266 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2267 2268 /* read in every ones (except the last) and ship off */ 2269 for ( i=1; i<size-1; i++ ) { 2270 nz = procsnz[i]; 2271 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2272 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2273 } 2274 /* read in the stuff for the last proc */ 2275 if ( size != 1 ) { 2276 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2277 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2278 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2279 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2280 } 2281 PetscFree(cols); 2282 } else { 2283 /* determine buffer space needed for message */ 2284 nz = 0; 2285 for ( i=0; i<m; i++ ) { 2286 nz += locrowlens[i]; 2287 } 2288 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2289 mycols = ibuf; 2290 /* receive message of column indices*/ 2291 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2292 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2293 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2294 } 2295 2296 /* loop over local rows, determining number of off diagonal entries */ 2297 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2298 odlens = dlens + (rend-rstart); 2299 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2300 PetscMemzero(mask,3*Mbs*sizeof(int)); 2301 masked1 = mask + Mbs; 2302 masked2 = masked1 + Mbs; 2303 rowcount = 0; nzcount = 0; 2304 for ( i=0; i<mbs; i++ ) { 2305 dcount = 0; 2306 odcount = 0; 2307 for ( j=0; j<bs; j++ ) { 2308 kmax = locrowlens[rowcount]; 2309 for ( k=0; k<kmax; k++ ) { 2310 tmp = mycols[nzcount++]/bs; 2311 if (!mask[tmp]) { 2312 mask[tmp] = 1; 2313 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2314 else masked1[dcount++] = tmp; 2315 } 2316 } 2317 rowcount++; 2318 } 2319 2320 dlens[i] = dcount; 2321 odlens[i] = odcount; 2322 2323 /* zero out the mask elements we set */ 2324 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2325 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2326 } 2327 2328 /* create our matrix */ 2329 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2330 CHKERRQ(ierr); 2331 A = *newmat; 2332 MatSetOption(A,MAT_COLUMNS_SORTED); 2333 2334 if (!rank) { 2335 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2336 /* read in my part of the matrix numerical values */ 2337 nz = procsnz[0]; 2338 vals = buf; 2339 mycols = ibuf; 2340 if (size == 1) nz -= extra_rows; 2341 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2342 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2343 2344 /* insert into matrix */ 2345 jj = rstart*bs; 2346 for ( i=0; i<m; i++ ) { 2347 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2348 mycols += locrowlens[i]; 2349 vals += locrowlens[i]; 2350 jj++; 2351 } 2352 /* read in other processors (except the last one) and ship out */ 2353 for ( i=1; i<size-1; i++ ) { 2354 nz = procsnz[i]; 2355 vals = buf; 2356 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2357 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2358 } 2359 /* the last proc */ 2360 if ( size != 1 ){ 2361 nz = procsnz[i] - extra_rows; 2362 vals = buf; 2363 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2364 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2365 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2366 } 2367 PetscFree(procsnz); 2368 } else { 2369 /* receive numeric values */ 2370 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2371 2372 /* receive message of values*/ 2373 vals = buf; 2374 mycols = ibuf; 2375 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2376 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2377 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2378 2379 /* insert into matrix */ 2380 jj = rstart*bs; 2381 for ( i=0; i<m; i++ ) { 2382 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2383 mycols += locrowlens[i]; 2384 vals += locrowlens[i]; 2385 jj++; 2386 } 2387 } 2388 PetscFree(locrowlens); 2389 PetscFree(buf); 2390 PetscFree(ibuf); 2391 PetscFree(rowners); 2392 PetscFree(dlens); 2393 PetscFree(mask); 2394 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2395 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2396 PetscFunctionReturn(0); 2397 } 2398 2399 2400 2401 #undef __FUNC__ 2402 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2403 /*@ 2404 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2405 2406 Input Parameters: 2407 . mat - the matrix 2408 . fact - factor 2409 2410 Collective on Mat 2411 2412 Level: advanced 2413 2414 Notes: 2415 This can also be set by the command line option: -mat_use_hash_table fact 2416 2417 .keywords: matrix, hashtable, factor, HT 2418 2419 .seealso: MatSetOption() 2420 @*/ 2421 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2422 { 2423 Mat_MPIBAIJ *baij; 2424 2425 PetscFunctionBegin; 2426 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2427 if (mat->type != MATMPIBAIJ) { 2428 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2429 } 2430 baij = (Mat_MPIBAIJ*) mat->data; 2431 baij->ht_fact = fact; 2432 PetscFunctionReturn(0); 2433 } 2434