1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.162 1999/03/16 16:41:19 bsmith Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 /* 20 Local utility routine that creates a mapping from the global column 21 number to the local number in the off-diagonal part of the local 22 storage of the matrix. This is done in a non scable way since the 23 length of colmap equals the global matrix length. 24 */ 25 #undef __FUNC__ 26 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 27 static int CreateColmap_MPIBAIJ_Private(Mat mat) 28 { 29 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 30 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 31 int nbs = B->nbs,i,bs=B->bs; 32 #if defined (USE_CTABLE) 33 int ierr; 34 #endif 35 36 PetscFunctionBegin; 37 #if defined (USE_CTABLE) 38 ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr); 39 for ( i=0; i<nbs; i++ ){ 40 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 41 } 42 #else 43 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 44 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 45 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 46 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 47 #endif 48 PetscFunctionReturn(0); 49 } 50 51 #define CHUNKSIZE 10 52 53 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 54 { \ 55 \ 56 brow = row/bs; \ 57 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 58 rmax = aimax[brow]; nrow = ailen[brow]; \ 59 bcol = col/bs; \ 60 ridx = row % bs; cidx = col % bs; \ 61 low = 0; high = nrow; \ 62 while (high-low > 3) { \ 63 t = (low+high)/2; \ 64 if (rp[t] > bcol) high = t; \ 65 else low = t; \ 66 } \ 67 for ( _i=low; _i<high; _i++ ) { \ 68 if (rp[_i] > bcol) break; \ 69 if (rp[_i] == bcol) { \ 70 bap = ap + bs2*_i + bs*cidx + ridx; \ 71 if (addv == ADD_VALUES) *bap += value; \ 72 else *bap = value; \ 73 goto a_noinsert; \ 74 } \ 75 } \ 76 if (a->nonew == 1) goto a_noinsert; \ 77 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 78 if (nrow >= rmax) { \ 79 /* there is no extra room in row, therefore enlarge */ \ 80 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 81 Scalar *new_a; \ 82 \ 83 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 84 \ 85 /* malloc new storage space */ \ 86 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 87 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 88 new_j = (int *) (new_a + bs2*new_nz); \ 89 new_i = new_j + new_nz; \ 90 \ 91 /* copy over old data into new slots */ \ 92 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 93 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 94 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 95 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 96 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 97 len*sizeof(int)); \ 98 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 99 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 100 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 101 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 102 /* free up old matrix storage */ \ 103 PetscFree(a->a); \ 104 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 105 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 106 a->singlemalloc = 1; \ 107 \ 108 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 109 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 110 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 111 a->maxnz += bs2*CHUNKSIZE; \ 112 a->reallocs++; \ 113 a->nz++; \ 114 } \ 115 N = nrow++ - 1; \ 116 /* shift up all the later entries in this row */ \ 117 for ( ii=N; ii>=_i; ii-- ) { \ 118 rp[ii+1] = rp[ii]; \ 119 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 120 } \ 121 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 122 rp[_i] = bcol; \ 123 ap[bs2*_i + bs*cidx + ridx] = value; \ 124 a_noinsert:; \ 125 ailen[brow] = nrow; \ 126 } 127 128 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 129 { \ 130 \ 131 brow = row/bs; \ 132 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 133 rmax = bimax[brow]; nrow = bilen[brow]; \ 134 bcol = col/bs; \ 135 ridx = row % bs; cidx = col % bs; \ 136 low = 0; high = nrow; \ 137 while (high-low > 3) { \ 138 t = (low+high)/2; \ 139 if (rp[t] > bcol) high = t; \ 140 else low = t; \ 141 } \ 142 for ( _i=low; _i<high; _i++ ) { \ 143 if (rp[_i] > bcol) break; \ 144 if (rp[_i] == bcol) { \ 145 bap = ap + bs2*_i + bs*cidx + ridx; \ 146 if (addv == ADD_VALUES) *bap += value; \ 147 else *bap = value; \ 148 goto b_noinsert; \ 149 } \ 150 } \ 151 if (b->nonew == 1) goto b_noinsert; \ 152 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 153 if (nrow >= rmax) { \ 154 /* there is no extra room in row, therefore enlarge */ \ 155 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 156 Scalar *new_a; \ 157 \ 158 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 159 \ 160 /* malloc new storage space */ \ 161 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 162 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 163 new_j = (int *) (new_a + bs2*new_nz); \ 164 new_i = new_j + new_nz; \ 165 \ 166 /* copy over old data into new slots */ \ 167 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 168 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 169 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 170 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 171 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 172 len*sizeof(int)); \ 173 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 174 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 175 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 176 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 177 /* free up old matrix storage */ \ 178 PetscFree(b->a); \ 179 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 180 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 181 b->singlemalloc = 1; \ 182 \ 183 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 184 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 185 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 186 b->maxnz += bs2*CHUNKSIZE; \ 187 b->reallocs++; \ 188 b->nz++; \ 189 } \ 190 N = nrow++ - 1; \ 191 /* shift up all the later entries in this row */ \ 192 for ( ii=N; ii>=_i; ii-- ) { \ 193 rp[ii+1] = rp[ii]; \ 194 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 195 } \ 196 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 197 rp[_i] = bcol; \ 198 ap[bs2*_i + bs*cidx + ridx] = value; \ 199 b_noinsert:; \ 200 bilen[brow] = nrow; \ 201 } 202 203 #undef __FUNC__ 204 #define __FUNC__ "MatSetValues_MPIBAIJ" 205 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 206 { 207 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 208 Scalar value; 209 int ierr,i,j,row,col; 210 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 211 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 212 int cend_orig=baij->cend_bs,bs=baij->bs; 213 214 /* Some Variables required in the macro */ 215 Mat A = baij->A; 216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 217 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 218 Scalar *aa=a->a; 219 220 Mat B = baij->B; 221 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 222 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 223 Scalar *ba=b->a; 224 225 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 226 int low,high,t,ridx,cidx,bs2=a->bs2; 227 Scalar *ap,*bap; 228 229 PetscFunctionBegin; 230 for ( i=0; i<m; i++ ) { 231 if (im[i] < 0) continue; 232 #if defined(USE_PETSC_BOPT_g) 233 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 234 #endif 235 if (im[i] >= rstart_orig && im[i] < rend_orig) { 236 row = im[i] - rstart_orig; 237 for ( j=0; j<n; j++ ) { 238 if (in[j] >= cstart_orig && in[j] < cend_orig){ 239 col = in[j] - cstart_orig; 240 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 241 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 242 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 243 } else if (in[j] < 0) continue; 244 #if defined(USE_PETSC_BOPT_g) 245 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 246 #endif 247 else { 248 if (mat->was_assembled) { 249 if (!baij->colmap) { 250 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 251 } 252 #if defined (USE_CTABLE) 253 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr); 254 col = col - 1 + in[j]%bs; 255 #else 256 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 257 #endif 258 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 259 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 260 col = in[j]; 261 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 262 B = baij->B; 263 b = (Mat_SeqBAIJ *) (B)->data; 264 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 265 ba=b->a; 266 } 267 } else col = in[j]; 268 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 269 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 270 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 271 } 272 } 273 } else { 274 if ( !baij->donotstash) { 275 if (roworiented) { 276 ierr = StashValuesRoworiented_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 277 } else { 278 ierr = StashValuesColumnoriented_Private(&baij->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 279 } 280 } 281 } 282 } 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNC__ 287 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 288 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 289 { 290 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 291 Scalar *value,*barray=baij->barray; 292 int ierr,i,j,ii,jj,row,col; 293 int roworiented = baij->roworiented,rstart=baij->rstart ; 294 int rend=baij->rend,cstart=baij->cstart,stepval; 295 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 296 297 if(!barray) { 298 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 299 } 300 301 if (roworiented) { 302 stepval = (n-1)*bs; 303 } else { 304 stepval = (m-1)*bs; 305 } 306 for ( i=0; i<m; i++ ) { 307 if (im[i] < 0) continue; 308 #if defined(USE_PETSC_BOPT_g) 309 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 310 #endif 311 if (im[i] >= rstart && im[i] < rend) { 312 row = im[i] - rstart; 313 for ( j=0; j<n; j++ ) { 314 /* If NumCol = 1 then a copy is not required */ 315 if ((roworiented) && (n == 1)) { 316 barray = v + i*bs2; 317 } else if((!roworiented) && (m == 1)) { 318 barray = v + j*bs2; 319 } else { /* Here a copy is required */ 320 if (roworiented) { 321 value = v + i*(stepval+bs)*bs + j*bs; 322 } else { 323 value = v + j*(stepval+bs)*bs + i*bs; 324 } 325 for ( ii=0; ii<bs; ii++,value+=stepval ) { 326 for (jj=0; jj<bs; jj++ ) { 327 *barray++ = *value++; 328 } 329 } 330 barray -=bs2; 331 } 332 333 if (in[j] >= cstart && in[j] < cend){ 334 col = in[j] - cstart; 335 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 336 } 337 else if (in[j] < 0) continue; 338 #if defined(USE_PETSC_BOPT_g) 339 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 340 #endif 341 else { 342 if (mat->was_assembled) { 343 if (!baij->colmap) { 344 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 345 } 346 347 #if defined(USE_PETSC_BOPT_g) 348 #if defined (USE_CTABLE) 349 { int data; 350 ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr); 351 if((data - 1) % bs) 352 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 353 } 354 #else 355 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 356 #endif 357 #endif 358 #if defined (USE_CTABLE) 359 ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr); 360 col = (col - 1)/bs; 361 #else 362 col = (baij->colmap[in[j]] - 1)/bs; 363 #endif 364 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 365 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 366 col = in[j]; 367 } 368 } 369 else col = in[j]; 370 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 371 } 372 } 373 } else { 374 if (!baij->donotstash) { 375 if (roworiented) { 376 ierr = StashValuesRoworientedBlocked_Private( 377 &baij->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 378 } else { 379 ierr = StashValuesColumnorientedBlocked_Private( 380 &baij->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 381 } 382 } 383 } 384 } 385 PetscFunctionReturn(0); 386 } 387 #define HASH_KEY 0.6180339887 388 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 389 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 390 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 391 #undef __FUNC__ 392 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 393 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 394 { 395 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 396 int ierr,i,j,row,col; 397 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 398 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 399 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 400 double tmp; 401 Scalar ** HD = baij->hd,value; 402 #if defined(USE_PETSC_BOPT_g) 403 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 404 #endif 405 406 PetscFunctionBegin; 407 408 for ( i=0; i<m; i++ ) { 409 #if defined(USE_PETSC_BOPT_g) 410 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 411 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 412 #endif 413 row = im[i]; 414 if (row >= rstart_orig && row < rend_orig) { 415 for ( j=0; j<n; j++ ) { 416 col = in[j]; 417 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 418 /* Look up into the Hash Table */ 419 key = (row/bs)*Nbs+(col/bs)+1; 420 h1 = HASH(size,key,tmp); 421 422 423 idx = h1; 424 #if defined(USE_PETSC_BOPT_g) 425 insert_ct++; 426 total_ct++; 427 if (HT[idx] != key) { 428 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 429 if (idx == size) { 430 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 431 if (idx == h1) { 432 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 433 } 434 } 435 } 436 #else 437 if (HT[idx] != key) { 438 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 439 if (idx == size) { 440 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 441 if (idx == h1) { 442 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 443 } 444 } 445 } 446 #endif 447 /* A HASH table entry is found, so insert the values at the correct address */ 448 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 449 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 450 } 451 } else { 452 if (!baij->donotstash) { 453 if (roworiented) { 454 ierr = StashValuesRoworiented_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 455 } else { 456 ierr = StashValuesColumnoriented_Private(&baij->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 457 } 458 } 459 } 460 } 461 #if defined(USE_PETSC_BOPT_g) 462 baij->ht_total_ct = total_ct; 463 baij->ht_insert_ct = insert_ct; 464 #endif 465 PetscFunctionReturn(0); 466 } 467 468 #undef __FUNC__ 469 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 470 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 471 { 472 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 473 int ierr,i,j,ii,jj,row,col,k,l; 474 int roworiented = baij->roworiented,rstart=baij->rstart ; 475 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 476 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 477 double tmp; 478 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 479 #if defined(USE_PETSC_BOPT_g) 480 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 481 #endif 482 483 PetscFunctionBegin; 484 485 if (roworiented) { 486 stepval = (n-1)*bs; 487 } else { 488 stepval = (m-1)*bs; 489 } 490 for ( i=0; i<m; i++ ) { 491 #if defined(USE_PETSC_BOPT_g) 492 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 493 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 494 #endif 495 row = im[i]; 496 v_t = v + i*bs2; 497 if (row >= rstart && row < rend) { 498 for ( j=0; j<n; j++ ) { 499 col = in[j]; 500 501 /* Look up into the Hash Table */ 502 key = row*Nbs+col+1; 503 h1 = HASH(size,key,tmp); 504 505 idx = h1; 506 #if defined(USE_PETSC_BOPT_g) 507 total_ct++; 508 insert_ct++; 509 if (HT[idx] != key) { 510 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 511 if (idx == size) { 512 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 513 if (idx == h1) { 514 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 515 } 516 } 517 } 518 #else 519 if (HT[idx] != key) { 520 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 521 if (idx == size) { 522 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 523 if (idx == h1) { 524 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 525 } 526 } 527 } 528 #endif 529 baij_a = HD[idx]; 530 if (roworiented) { 531 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 532 /* value = v + (i*(stepval+bs)+j)*bs; */ 533 value = v_t; 534 v_t += bs; 535 if (addv == ADD_VALUES) { 536 for ( ii=0; ii<bs; ii++,value+=stepval) { 537 for ( jj=ii; jj<bs2; jj+=bs ) { 538 baij_a[jj] += *value++; 539 } 540 } 541 } else { 542 for ( ii=0; ii<bs; ii++,value+=stepval) { 543 for ( jj=ii; jj<bs2; jj+=bs ) { 544 baij_a[jj] = *value++; 545 } 546 } 547 } 548 } else { 549 value = v + j*(stepval+bs)*bs + i*bs; 550 if (addv == ADD_VALUES) { 551 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 552 for ( jj=0; jj<bs; jj++ ) { 553 baij_a[jj] += *value++; 554 } 555 } 556 } else { 557 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 558 for ( jj=0; jj<bs; jj++ ) { 559 baij_a[jj] = *value++; 560 } 561 } 562 } 563 } 564 } 565 } else { 566 if (!baij->donotstash) { 567 if (roworiented ) { 568 row = im[i]*bs; 569 value = v + i*(stepval+bs)*bs; 570 for ( j=0; j<bs; j++,row++ ) { 571 for ( k=0; k<n; k++ ) { 572 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 573 ierr = StashValuesRoworiented_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr); 574 } 575 } 576 } 577 } else { 578 for ( j=0; j<n; j++ ) { 579 value = v + j*(stepval+bs)*bs + i*bs; 580 col = in[j]*bs; 581 for ( k=0; k<bs; k++,col++,value+=stepval) { 582 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 583 ierr = StashValuesRoworiented_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr); 584 } 585 } 586 } 587 } 588 } 589 } 590 } 591 #if defined(USE_PETSC_BOPT_g) 592 baij->ht_total_ct = total_ct; 593 baij->ht_insert_ct = insert_ct; 594 #endif 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNC__ 599 #define __FUNC__ "MatGetValues_MPIBAIJ" 600 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 601 { 602 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 603 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 604 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 605 606 PetscFunctionBegin; 607 for ( i=0; i<m; i++ ) { 608 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 609 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 610 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 611 row = idxm[i] - bsrstart; 612 for ( j=0; j<n; j++ ) { 613 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 614 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 615 if (idxn[j] >= bscstart && idxn[j] < bscend){ 616 col = idxn[j] - bscstart; 617 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 618 } else { 619 if (!baij->colmap) { 620 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 621 } 622 #if defined (USE_CTABLE) 623 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr); 624 data --; 625 #else 626 data = baij->colmap[idxn[j]/bs]-1; 627 #endif 628 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 629 else { 630 col = data + idxn[j]%bs; 631 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 632 } 633 } 634 } 635 } else { 636 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 637 } 638 } 639 PetscFunctionReturn(0); 640 } 641 642 #undef __FUNC__ 643 #define __FUNC__ "MatNorm_MPIBAIJ" 644 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 645 { 646 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 647 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 648 int ierr, i,bs2=baij->bs2; 649 double sum = 0.0; 650 Scalar *v; 651 652 PetscFunctionBegin; 653 if (baij->size == 1) { 654 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 655 } else { 656 if (type == NORM_FROBENIUS) { 657 v = amat->a; 658 for (i=0; i<amat->nz*bs2; i++ ) { 659 #if defined(USE_PETSC_COMPLEX) 660 sum += PetscReal(PetscConj(*v)*(*v)); v++; 661 #else 662 sum += (*v)*(*v); v++; 663 #endif 664 } 665 v = bmat->a; 666 for (i=0; i<bmat->nz*bs2; i++ ) { 667 #if defined(USE_PETSC_COMPLEX) 668 sum += PetscReal(PetscConj(*v)*(*v)); v++; 669 #else 670 sum += (*v)*(*v); v++; 671 #endif 672 } 673 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 674 *norm = sqrt(*norm); 675 } else { 676 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 677 } 678 } 679 PetscFunctionReturn(0); 680 } 681 682 683 /* 684 Creates the hash table, and sets the table 685 This table is created only once. 686 If new entried need to be added to the matrix 687 then the hash table has to be destroyed and 688 recreated. 689 */ 690 #undef __FUNC__ 691 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 692 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 693 { 694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 695 Mat A = baij->A, B=baij->B; 696 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 697 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 698 int size,bs2=baij->bs2,rstart=baij->rstart; 699 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 700 int *HT,key; 701 Scalar **HD; 702 double tmp; 703 #if defined(USE_PETSC_BOPT_g) 704 int ct=0,max=0; 705 #endif 706 707 PetscFunctionBegin; 708 baij->ht_size=(int)(factor*nz); 709 size = baij->ht_size; 710 711 if (baij->ht) { 712 PetscFunctionReturn(0); 713 } 714 715 /* Allocate Memory for Hash Table */ 716 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 717 baij->ht = (int*)(baij->hd + size); 718 HD = baij->hd; 719 HT = baij->ht; 720 721 722 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 723 724 725 /* Loop Over A */ 726 for ( i=0; i<a->mbs; i++ ) { 727 for ( j=ai[i]; j<ai[i+1]; j++ ) { 728 row = i+rstart; 729 col = aj[j]+cstart; 730 731 key = row*Nbs + col + 1; 732 h1 = HASH(size,key,tmp); 733 for ( k=0; k<size; k++ ){ 734 if (HT[(h1+k)%size] == 0.0) { 735 HT[(h1+k)%size] = key; 736 HD[(h1+k)%size] = a->a + j*bs2; 737 break; 738 #if defined(USE_PETSC_BOPT_g) 739 } else { 740 ct++; 741 #endif 742 } 743 } 744 #if defined(USE_PETSC_BOPT_g) 745 if (k> max) max = k; 746 #endif 747 } 748 } 749 /* Loop Over B */ 750 for ( i=0; i<b->mbs; i++ ) { 751 for ( j=bi[i]; j<bi[i+1]; j++ ) { 752 row = i+rstart; 753 col = garray[bj[j]]; 754 key = row*Nbs + col + 1; 755 h1 = HASH(size,key,tmp); 756 for ( k=0; k<size; k++ ){ 757 if (HT[(h1+k)%size] == 0.0) { 758 HT[(h1+k)%size] = key; 759 HD[(h1+k)%size] = b->a + j*bs2; 760 break; 761 #if defined(USE_PETSC_BOPT_g) 762 } else { 763 ct++; 764 #endif 765 } 766 } 767 #if defined(USE_PETSC_BOPT_g) 768 if (k> max) max = k; 769 #endif 770 } 771 } 772 773 /* Print Summary */ 774 #if defined(USE_PETSC_BOPT_g) 775 for ( i=0,j=0; i<size; i++) 776 if (HT[i]) {j++;} 777 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 778 (j== 0)? 0.0:((double)(ct+j))/j,max); 779 #endif 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNC__ 784 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 785 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 786 { 787 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 788 int ierr,nstash,reallocs; 789 InsertMode addv; 790 791 PetscFunctionBegin; 792 if (baij->donotstash) { 793 PetscFunctionReturn(0); 794 } 795 796 /* make sure all processors are either in INSERTMODE or ADDMODE */ 797 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 798 if (addv == (ADD_VALUES|INSERT_VALUES)) { 799 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 800 } 801 mat->insertmode = addv; /* in case this processor had no cache */ 802 803 ierr = StashScatterBegin_Private(&baij->stash,baij->rowners_bs); CHKERRQ(ierr); 804 ierr = StashScatterBegin_Private(&baij->bstash,baij->rowners); CHKERRQ(ierr); 805 ierr = StashGetInfo_Private(&baij->stash,&nstash,&reallocs); CHKERRQ(ierr); 806 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n", 807 nstash,reallocs); 808 ierr = StashGetInfo_Private(&baij->stash,&nstash,&reallocs); CHKERRQ(ierr); 809 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n", 810 nstash,reallocs); 811 PetscFunctionReturn(0); 812 } 813 814 #undef __FUNC__ 815 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 816 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 817 { 818 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 819 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 820 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 821 int *row,*col,other_disassembled,r1,r2,r3; 822 Scalar *val; 823 InsertMode addv = mat->insertmode; 824 825 PetscFunctionBegin; 826 if (!baij->donotstash) { 827 while (1) { 828 ierr = StashScatterGetMesg_Private(&baij->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 829 if (!flg) break; 830 831 for ( i=0; i<n; ) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 834 if (j < n) ncols = j-i; 835 else ncols = n-i; 836 /* Now assemble all these values with a single function call */ 837 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 838 i = j; 839 } 840 } 841 ierr = StashScatterEnd_Private(&baij->stash); CHKERRQ(ierr); 842 /* Now process the block-stash. Since the values are stashed column-oriented, 843 set the roworiented flag to column oriented, and after MatSetValues() 844 restore the original flags */ 845 r1 = baij->roworiented; 846 r2 = a->roworiented; 847 r3 = b->roworiented; 848 baij->roworiented = 0; 849 a->roworiented = 0; 850 b->roworiented = 0; 851 while (1) { 852 ierr = StashScatterGetMesg_Private(&baij->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 853 if (!flg) break; 854 855 for ( i=0; i<n; ) { 856 /* Now identify the consecutive vals belonging to the same row */ 857 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 858 if (j < n) ncols = j-i; 859 else ncols = n-i; 860 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr); 861 i = j; 862 } 863 } 864 ierr = StashScatterEnd_Private(&baij->bstash); CHKERRQ(ierr); 865 baij->roworiented = r1; 866 a->roworiented = r2; 867 b->roworiented = r3; 868 } 869 870 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 871 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 872 873 /* determine if any processor has disassembled, if so we must 874 also disassemble ourselfs, in order that we may reassemble. */ 875 /* 876 if nonzero structure of submatrix B cannot change then we know that 877 no processor disassembled thus we can skip this stuff 878 */ 879 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 880 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 881 if (mat->was_assembled && !other_disassembled) { 882 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 883 } 884 } 885 886 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 887 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 888 } 889 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 890 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 891 892 #if defined(USE_PETSC_BOPT_g) 893 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 894 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 895 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 896 baij->ht_total_ct = 0; 897 baij->ht_insert_ct = 0; 898 } 899 #endif 900 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 901 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 902 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 903 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 904 } 905 906 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 907 PetscFunctionReturn(0); 908 } 909 910 #undef __FUNC__ 911 #define __FUNC__ "MatView_MPIBAIJ_Binary" 912 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 913 { 914 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 915 int ierr; 916 917 PetscFunctionBegin; 918 if (baij->size == 1) { 919 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 920 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNC__ 925 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 926 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 927 { 928 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 929 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 930 FILE *fd; 931 ViewerType vtype; 932 933 PetscFunctionBegin; 934 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 935 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 936 ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr); 937 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 938 MatInfo info; 939 MPI_Comm_rank(mat->comm,&rank); 940 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 941 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 942 PetscSequentialPhaseBegin(mat->comm,1); 943 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 944 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 945 baij->bs,(int)info.memory); 946 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 947 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 948 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 949 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 950 fflush(fd); 951 PetscSequentialPhaseEnd(mat->comm,1); 952 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 955 PetscPrintf(mat->comm," block size is %d\n",bs); 956 PetscFunctionReturn(0); 957 } 958 } 959 960 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 961 Draw draw; 962 PetscTruth isnull; 963 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 964 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 965 } 966 967 if (size == 1) { 968 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 969 } else { 970 /* assemble the entire matrix onto first processor. */ 971 Mat A; 972 Mat_SeqBAIJ *Aloc; 973 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 974 int mbs = baij->mbs; 975 Scalar *a; 976 977 if (!rank) { 978 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 979 } else { 980 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 981 } 982 PLogObjectParent(mat,A); 983 984 /* copy over the A part */ 985 Aloc = (Mat_SeqBAIJ*) baij->A->data; 986 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 987 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 988 989 for ( i=0; i<mbs; i++ ) { 990 rvals[0] = bs*(baij->rstart + i); 991 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 992 for ( j=ai[i]; j<ai[i+1]; j++ ) { 993 col = (baij->cstart+aj[j])*bs; 994 for (k=0; k<bs; k++ ) { 995 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 996 col++; a += bs; 997 } 998 } 999 } 1000 /* copy over the B part */ 1001 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1002 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1003 for ( i=0; i<mbs; i++ ) { 1004 rvals[0] = bs*(baij->rstart + i); 1005 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1006 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1007 col = baij->garray[aj[j]]*bs; 1008 for (k=0; k<bs; k++ ) { 1009 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1010 col++; a += bs; 1011 } 1012 } 1013 } 1014 PetscFree(rvals); 1015 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1016 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1017 /* 1018 Everyone has to call to draw the matrix since the graphics waits are 1019 synchronized across all processors that share the Draw object 1020 */ 1021 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1022 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1023 } 1024 ierr = MatDestroy(A); CHKERRQ(ierr); 1025 } 1026 PetscFunctionReturn(0); 1027 } 1028 1029 1030 1031 #undef __FUNC__ 1032 #define __FUNC__ "MatView_MPIBAIJ" 1033 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1034 { 1035 int ierr; 1036 ViewerType vtype; 1037 1038 PetscFunctionBegin; 1039 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1040 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1041 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 1042 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 1043 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1044 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1045 } else { 1046 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNC__ 1052 #define __FUNC__ "MatDestroy_MPIBAIJ" 1053 int MatDestroy_MPIBAIJ(Mat mat) 1054 { 1055 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1056 int ierr; 1057 1058 PetscFunctionBegin; 1059 if (--mat->refct > 0) PetscFunctionReturn(0); 1060 1061 if (mat->mapping) { 1062 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 1063 } 1064 if (mat->bmapping) { 1065 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 1066 } 1067 if (mat->rmap) { 1068 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1069 } 1070 if (mat->cmap) { 1071 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1072 } 1073 #if defined(USE_PETSC_LOG) 1074 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1075 #endif 1076 1077 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1078 ierr = StashDestroy_Private(&baij->bstash); CHKERRQ(ierr); 1079 PetscFree(baij->rowners); 1080 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1081 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1082 #if defined (USE_CTABLE) 1083 if (baij->colmap) TableDelete(baij->colmap); 1084 #else 1085 if (baij->colmap) PetscFree(baij->colmap); 1086 #endif 1087 if (baij->garray) PetscFree(baij->garray); 1088 if (baij->lvec) VecDestroy(baij->lvec); 1089 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1090 if (baij->rowvalues) PetscFree(baij->rowvalues); 1091 if (baij->barray) PetscFree(baij->barray); 1092 if (baij->hd) PetscFree(baij->hd); 1093 PetscFree(baij); 1094 PLogObjectDestroy(mat); 1095 PetscHeaderDestroy(mat); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNC__ 1100 #define __FUNC__ "MatMult_MPIBAIJ" 1101 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1102 { 1103 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1104 int ierr, nt; 1105 1106 PetscFunctionBegin; 1107 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1108 if (nt != a->n) { 1109 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1110 } 1111 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1112 if (nt != a->m) { 1113 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1114 } 1115 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1116 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1117 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1118 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1119 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNC__ 1124 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1125 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1126 { 1127 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1128 int ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1132 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1133 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1134 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNC__ 1139 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1140 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1141 { 1142 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1143 int ierr; 1144 1145 PetscFunctionBegin; 1146 /* do nondiagonal part */ 1147 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1148 /* send it on its way */ 1149 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1150 /* do local part */ 1151 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1152 /* receive remote parts: note this assumes the values are not actually */ 1153 /* inserted in yy until the next line, which is true for my implementation*/ 1154 /* but is not perhaps always true. */ 1155 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNC__ 1160 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1161 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1162 { 1163 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1164 int ierr; 1165 1166 PetscFunctionBegin; 1167 /* do nondiagonal part */ 1168 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1169 /* send it on its way */ 1170 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1171 /* do local part */ 1172 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1173 /* receive remote parts: note this assumes the values are not actually */ 1174 /* inserted in yy until the next line, which is true for my implementation*/ 1175 /* but is not perhaps always true. */ 1176 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 /* 1181 This only works correctly for square matrices where the subblock A->A is the 1182 diagonal block 1183 */ 1184 #undef __FUNC__ 1185 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1186 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1187 { 1188 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1189 int ierr; 1190 1191 PetscFunctionBegin; 1192 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1193 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNC__ 1198 #define __FUNC__ "MatScale_MPIBAIJ" 1199 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1200 { 1201 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1202 int ierr; 1203 1204 PetscFunctionBegin; 1205 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1206 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNC__ 1211 #define __FUNC__ "MatGetSize_MPIBAIJ" 1212 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1213 { 1214 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1215 1216 PetscFunctionBegin; 1217 if (m) *m = mat->M; 1218 if (n) *n = mat->N; 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNC__ 1223 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1224 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1225 { 1226 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1227 1228 PetscFunctionBegin; 1229 *m = mat->m; *n = mat->n; 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNC__ 1234 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1235 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1236 { 1237 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1238 1239 PetscFunctionBegin; 1240 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNC__ 1245 #define __FUNC__ "MatGetRow_MPIBAIJ" 1246 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1247 { 1248 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1249 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1250 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1251 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1252 int *cmap, *idx_p,cstart = mat->cstart; 1253 1254 PetscFunctionBegin; 1255 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1256 mat->getrowactive = PETSC_TRUE; 1257 1258 if (!mat->rowvalues && (idx || v)) { 1259 /* 1260 allocate enough space to hold information from the longest row. 1261 */ 1262 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1263 int max = 1,mbs = mat->mbs,tmp; 1264 for ( i=0; i<mbs; i++ ) { 1265 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1266 if (max < tmp) { max = tmp; } 1267 } 1268 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1269 CHKPTRQ(mat->rowvalues); 1270 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1271 } 1272 1273 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1274 lrow = row - brstart; 1275 1276 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1277 if (!v) {pvA = 0; pvB = 0;} 1278 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1279 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1280 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1281 nztot = nzA + nzB; 1282 1283 cmap = mat->garray; 1284 if (v || idx) { 1285 if (nztot) { 1286 /* Sort by increasing column numbers, assuming A and B already sorted */ 1287 int imark = -1; 1288 if (v) { 1289 *v = v_p = mat->rowvalues; 1290 for ( i=0; i<nzB; i++ ) { 1291 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1292 else break; 1293 } 1294 imark = i; 1295 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1296 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1297 } 1298 if (idx) { 1299 *idx = idx_p = mat->rowindices; 1300 if (imark > -1) { 1301 for ( i=0; i<imark; i++ ) { 1302 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1303 } 1304 } else { 1305 for ( i=0; i<nzB; i++ ) { 1306 if (cmap[cworkB[i]/bs] < cstart) 1307 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1308 else break; 1309 } 1310 imark = i; 1311 } 1312 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1313 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1314 } 1315 } else { 1316 if (idx) *idx = 0; 1317 if (v) *v = 0; 1318 } 1319 } 1320 *nz = nztot; 1321 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1322 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNC__ 1327 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1328 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1329 { 1330 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1331 1332 PetscFunctionBegin; 1333 if (baij->getrowactive == PETSC_FALSE) { 1334 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1335 } 1336 baij->getrowactive = PETSC_FALSE; 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNC__ 1341 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1342 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1343 { 1344 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1345 1346 PetscFunctionBegin; 1347 *bs = baij->bs; 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNC__ 1352 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1353 int MatZeroEntries_MPIBAIJ(Mat A) 1354 { 1355 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1356 int ierr; 1357 1358 PetscFunctionBegin; 1359 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1360 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNC__ 1365 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1366 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1367 { 1368 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1369 Mat A = a->A, B = a->B; 1370 int ierr; 1371 double isend[5], irecv[5]; 1372 1373 PetscFunctionBegin; 1374 info->block_size = (double)a->bs; 1375 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1376 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1377 isend[3] = info->memory; isend[4] = info->mallocs; 1378 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1379 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1380 isend[3] += info->memory; isend[4] += info->mallocs; 1381 if (flag == MAT_LOCAL) { 1382 info->nz_used = isend[0]; 1383 info->nz_allocated = isend[1]; 1384 info->nz_unneeded = isend[2]; 1385 info->memory = isend[3]; 1386 info->mallocs = isend[4]; 1387 } else if (flag == MAT_GLOBAL_MAX) { 1388 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1389 info->nz_used = irecv[0]; 1390 info->nz_allocated = irecv[1]; 1391 info->nz_unneeded = irecv[2]; 1392 info->memory = irecv[3]; 1393 info->mallocs = irecv[4]; 1394 } else if (flag == MAT_GLOBAL_SUM) { 1395 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1396 info->nz_used = irecv[0]; 1397 info->nz_allocated = irecv[1]; 1398 info->nz_unneeded = irecv[2]; 1399 info->memory = irecv[3]; 1400 info->mallocs = irecv[4]; 1401 } else { 1402 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1403 } 1404 info->rows_global = (double)a->M; 1405 info->columns_global = (double)a->N; 1406 info->rows_local = (double)a->m; 1407 info->columns_local = (double)a->N; 1408 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1409 info->fill_ratio_needed = 0; 1410 info->factor_mallocs = 0; 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNC__ 1415 #define __FUNC__ "MatSetOption_MPIBAIJ" 1416 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1417 { 1418 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1419 int ierr; 1420 1421 PetscFunctionBegin; 1422 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1423 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1424 op == MAT_COLUMNS_UNSORTED || 1425 op == MAT_COLUMNS_SORTED || 1426 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1427 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1428 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1429 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1430 } else if (op == MAT_ROW_ORIENTED) { 1431 a->roworiented = 1; 1432 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1434 } else if (op == MAT_ROWS_SORTED || 1435 op == MAT_ROWS_UNSORTED || 1436 op == MAT_SYMMETRIC || 1437 op == MAT_STRUCTURALLY_SYMMETRIC || 1438 op == MAT_YES_NEW_DIAGONALS || 1439 op == MAT_USE_HASH_TABLE) { 1440 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1441 } else if (op == MAT_COLUMN_ORIENTED) { 1442 a->roworiented = 0; 1443 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1444 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1445 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1446 a->donotstash = 1; 1447 } else if (op == MAT_NO_NEW_DIAGONALS) { 1448 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1449 } else if (op == MAT_USE_HASH_TABLE) { 1450 a->ht_flag = 1; 1451 } else { 1452 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1453 } 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNC__ 1458 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1459 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1460 { 1461 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1462 Mat_SeqBAIJ *Aloc; 1463 Mat B; 1464 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1465 int bs=baij->bs,mbs=baij->mbs; 1466 Scalar *a; 1467 1468 PetscFunctionBegin; 1469 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1470 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1471 CHKERRQ(ierr); 1472 1473 /* copy over the A part */ 1474 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1475 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1476 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1477 1478 for ( i=0; i<mbs; i++ ) { 1479 rvals[0] = bs*(baij->rstart + i); 1480 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1481 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1482 col = (baij->cstart+aj[j])*bs; 1483 for (k=0; k<bs; k++ ) { 1484 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1485 col++; a += bs; 1486 } 1487 } 1488 } 1489 /* copy over the B part */ 1490 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1491 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1492 for ( i=0; i<mbs; i++ ) { 1493 rvals[0] = bs*(baij->rstart + i); 1494 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1495 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1496 col = baij->garray[aj[j]]*bs; 1497 for (k=0; k<bs; k++ ) { 1498 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1499 col++; a += bs; 1500 } 1501 } 1502 } 1503 PetscFree(rvals); 1504 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1505 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1506 1507 if (matout != PETSC_NULL) { 1508 *matout = B; 1509 } else { 1510 PetscOps *Abops; 1511 MatOps Aops; 1512 1513 /* This isn't really an in-place transpose .... but free data structures from baij */ 1514 PetscFree(baij->rowners); 1515 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1516 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1517 #if defined (USE_CTABLE) 1518 if (baij->colmap) TableDelete(baij->colmap); 1519 #else 1520 if (baij->colmap) PetscFree(baij->colmap); 1521 #endif 1522 if (baij->garray) PetscFree(baij->garray); 1523 if (baij->lvec) VecDestroy(baij->lvec); 1524 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1525 PetscFree(baij); 1526 1527 /* 1528 This is horrible, horrible code. We need to keep the 1529 A pointers for the bops and ops but copy everything 1530 else from C. 1531 */ 1532 Abops = A->bops; 1533 Aops = A->ops; 1534 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1535 A->bops = Abops; 1536 A->ops = Aops; 1537 1538 PetscHeaderDestroy(B); 1539 } 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNC__ 1544 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1545 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1546 { 1547 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1548 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1549 int ierr,s1,s2,s3; 1550 1551 PetscFunctionBegin; 1552 if (ll) { 1553 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1554 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1555 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1556 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1557 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1558 } 1559 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1560 PetscFunctionReturn(0); 1561 } 1562 1563 #undef __FUNC__ 1564 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1565 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1566 { 1567 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1568 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1569 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1570 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1571 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1572 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1573 MPI_Comm comm = A->comm; 1574 MPI_Request *send_waits,*recv_waits; 1575 MPI_Status recv_status,*send_status; 1576 IS istmp; 1577 1578 PetscFunctionBegin; 1579 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1580 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1581 1582 /* first count number of contributors to each processor */ 1583 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1584 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1585 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1586 for ( i=0; i<N; i++ ) { 1587 idx = rows[i]; 1588 found = 0; 1589 for ( j=0; j<size; j++ ) { 1590 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1591 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1592 } 1593 } 1594 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1595 } 1596 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1597 1598 /* inform other processors of number of messages and max length*/ 1599 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1600 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1601 nrecvs = work[rank]; 1602 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1603 nmax = work[rank]; 1604 PetscFree(work); 1605 1606 /* post receives: */ 1607 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1608 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1609 for ( i=0; i<nrecvs; i++ ) { 1610 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1611 } 1612 1613 /* do sends: 1614 1) starts[i] gives the starting index in svalues for stuff going to 1615 the ith processor 1616 */ 1617 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1618 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1619 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1620 starts[0] = 0; 1621 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1622 for ( i=0; i<N; i++ ) { 1623 svalues[starts[owner[i]]++] = rows[i]; 1624 } 1625 ISRestoreIndices(is,&rows); 1626 1627 starts[0] = 0; 1628 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1629 count = 0; 1630 for ( i=0; i<size; i++ ) { 1631 if (procs[i]) { 1632 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1633 } 1634 } 1635 PetscFree(starts); 1636 1637 base = owners[rank]*bs; 1638 1639 /* wait on receives */ 1640 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1641 source = lens + nrecvs; 1642 count = nrecvs; slen = 0; 1643 while (count) { 1644 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1645 /* unpack receives into our local space */ 1646 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1647 source[imdex] = recv_status.MPI_SOURCE; 1648 lens[imdex] = n; 1649 slen += n; 1650 count--; 1651 } 1652 PetscFree(recv_waits); 1653 1654 /* move the data into the send scatter */ 1655 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1656 count = 0; 1657 for ( i=0; i<nrecvs; i++ ) { 1658 values = rvalues + i*nmax; 1659 for ( j=0; j<lens[i]; j++ ) { 1660 lrows[count++] = values[j] - base; 1661 } 1662 } 1663 PetscFree(rvalues); PetscFree(lens); 1664 PetscFree(owner); PetscFree(nprocs); 1665 1666 /* actually zap the local rows */ 1667 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1668 PLogObjectParent(A,istmp); 1669 1670 /* 1671 Zero the required rows. If the "diagonal block" of the matrix 1672 is square and the user wishes to set the diagonal we use seperate 1673 code so that MatSetValues() is not called for each diagonal allocating 1674 new memory, thus calling lots of mallocs and slowing things down. 1675 1676 Contributed by: Mathew Knepley 1677 */ 1678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1679 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1680 if (diag && (l->A->M == l->A->N)) { 1681 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1682 } else if (diag) { 1683 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1684 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1685 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1686 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1687 } 1688 for ( i = 0; i < slen; i++ ) { 1689 row = lrows[i] + rstart_bs; 1690 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1691 } 1692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1694 } else { 1695 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1696 } 1697 1698 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1699 PetscFree(lrows); 1700 1701 /* wait on sends */ 1702 if (nsends) { 1703 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1704 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1705 PetscFree(send_status); 1706 } 1707 PetscFree(send_waits); PetscFree(svalues); 1708 1709 PetscFunctionReturn(0); 1710 } 1711 1712 #undef __FUNC__ 1713 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1714 int MatPrintHelp_MPIBAIJ(Mat A) 1715 { 1716 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1717 MPI_Comm comm = A->comm; 1718 static int called = 0; 1719 int ierr; 1720 1721 PetscFunctionBegin; 1722 if (!a->rank) { 1723 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1724 } 1725 if (called) {PetscFunctionReturn(0);} else called = 1; 1726 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1727 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNC__ 1732 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1733 int MatSetUnfactored_MPIBAIJ(Mat A) 1734 { 1735 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1736 int ierr; 1737 1738 PetscFunctionBegin; 1739 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1740 PetscFunctionReturn(0); 1741 } 1742 1743 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1744 1745 /* -------------------------------------------------------------------*/ 1746 static struct _MatOps MatOps_Values = { 1747 MatSetValues_MPIBAIJ, 1748 MatGetRow_MPIBAIJ, 1749 MatRestoreRow_MPIBAIJ, 1750 MatMult_MPIBAIJ, 1751 MatMultAdd_MPIBAIJ, 1752 MatMultTrans_MPIBAIJ, 1753 MatMultTransAdd_MPIBAIJ, 1754 0, 1755 0, 1756 0, 1757 0, 1758 0, 1759 0, 1760 0, 1761 MatTranspose_MPIBAIJ, 1762 MatGetInfo_MPIBAIJ, 1763 0, 1764 MatGetDiagonal_MPIBAIJ, 1765 MatDiagonalScale_MPIBAIJ, 1766 MatNorm_MPIBAIJ, 1767 MatAssemblyBegin_MPIBAIJ, 1768 MatAssemblyEnd_MPIBAIJ, 1769 0, 1770 MatSetOption_MPIBAIJ, 1771 MatZeroEntries_MPIBAIJ, 1772 MatZeroRows_MPIBAIJ, 1773 0, 1774 0, 1775 0, 1776 0, 1777 MatGetSize_MPIBAIJ, 1778 MatGetLocalSize_MPIBAIJ, 1779 MatGetOwnershipRange_MPIBAIJ, 1780 0, 1781 0, 1782 0, 1783 0, 1784 MatDuplicate_MPIBAIJ, 1785 0, 1786 0, 1787 0, 1788 0, 1789 0, 1790 MatGetSubMatrices_MPIBAIJ, 1791 MatIncreaseOverlap_MPIBAIJ, 1792 MatGetValues_MPIBAIJ, 1793 0, 1794 MatPrintHelp_MPIBAIJ, 1795 MatScale_MPIBAIJ, 1796 0, 1797 0, 1798 0, 1799 MatGetBlockSize_MPIBAIJ, 1800 0, 1801 0, 1802 0, 1803 0, 1804 0, 1805 0, 1806 MatSetUnfactored_MPIBAIJ, 1807 0, 1808 MatSetValuesBlocked_MPIBAIJ, 1809 0, 1810 0, 1811 0, 1812 MatGetMaps_Petsc}; 1813 1814 1815 EXTERN_C_BEGIN 1816 #undef __FUNC__ 1817 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1818 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1819 { 1820 PetscFunctionBegin; 1821 *a = ((Mat_MPIBAIJ *)A->data)->A; 1822 *iscopy = PETSC_FALSE; 1823 PetscFunctionReturn(0); 1824 } 1825 EXTERN_C_END 1826 1827 #undef __FUNC__ 1828 #define __FUNC__ "MatCreateMPIBAIJ" 1829 /*@C 1830 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1831 (block compressed row). For good matrix assembly performance 1832 the user should preallocate the matrix storage by setting the parameters 1833 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1834 performance can be increased by more than a factor of 50. 1835 1836 Collective on MPI_Comm 1837 1838 Input Parameters: 1839 + comm - MPI communicator 1840 . bs - size of blockk 1841 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1842 This value should be the same as the local size used in creating the 1843 y vector for the matrix-vector product y = Ax. 1844 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1845 This value should be the same as the local size used in creating the 1846 x vector for the matrix-vector product y = Ax. 1847 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1848 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1849 . d_nz - number of block nonzeros per block row in diagonal portion of local 1850 submatrix (same for all local rows) 1851 . d_nzz - array containing the number of block nonzeros in the various block rows 1852 of the in diagonal portion of the local (possibly different for each block 1853 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1854 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1855 submatrix (same for all local rows). 1856 - o_nzz - array containing the number of nonzeros in the various block rows of the 1857 off-diagonal portion of the local submatrix (possibly different for 1858 each block row) or PETSC_NULL. 1859 1860 Output Parameter: 1861 . A - the matrix 1862 1863 Options Database Keys: 1864 . -mat_no_unroll - uses code that does not unroll the loops in the 1865 block calculations (much slower) 1866 . -mat_block_size - size of the blocks to use 1867 . -mat_mpi - use the parallel matrix data structures even on one processor 1868 (defaults to using SeqBAIJ format on one processor) 1869 1870 Notes: 1871 The user MUST specify either the local or global matrix dimensions 1872 (possibly both). 1873 1874 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1875 than it must be used on all processors that share the object for that argument. 1876 1877 Storage Information: 1878 For a square global matrix we define each processor's diagonal portion 1879 to be its local rows and the corresponding columns (a square submatrix); 1880 each processor's off-diagonal portion encompasses the remainder of the 1881 local matrix (a rectangular submatrix). 1882 1883 The user can specify preallocated storage for the diagonal part of 1884 the local submatrix with either d_nz or d_nnz (not both). Set 1885 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1886 memory allocation. Likewise, specify preallocated storage for the 1887 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1888 1889 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1890 the figure below we depict these three local rows and all columns (0-11). 1891 1892 .vb 1893 0 1 2 3 4 5 6 7 8 9 10 11 1894 ------------------- 1895 row 3 | o o o d d d o o o o o o 1896 row 4 | o o o d d d o o o o o o 1897 row 5 | o o o d d d o o o o o o 1898 ------------------- 1899 .ve 1900 1901 Thus, any entries in the d locations are stored in the d (diagonal) 1902 submatrix, and any entries in the o locations are stored in the 1903 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1904 stored simply in the MATSEQBAIJ format for compressed row storage. 1905 1906 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1907 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1908 In general, for PDE problems in which most nonzeros are near the diagonal, 1909 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1910 or you will get TERRIBLE performance; see the users' manual chapter on 1911 matrices. 1912 1913 Level: intermediate 1914 1915 .keywords: matrix, block, aij, compressed row, sparse, parallel 1916 1917 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1918 @*/ 1919 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1920 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1921 { 1922 Mat B; 1923 Mat_MPIBAIJ *b; 1924 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1925 int flag1 = 0,flag2 = 0; 1926 1927 PetscFunctionBegin; 1928 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1929 1930 MPI_Comm_size(comm,&size); 1931 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 1932 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1933 if (!flag1 && !flag2 && size == 1) { 1934 if (M == PETSC_DECIDE) M = m; 1935 if (N == PETSC_DECIDE) N = n; 1936 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1937 PetscFunctionReturn(0); 1938 } 1939 1940 *A = 0; 1941 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 1942 PLogObjectCreate(B); 1943 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1944 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1945 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1946 1947 B->ops->destroy = MatDestroy_MPIBAIJ; 1948 B->ops->view = MatView_MPIBAIJ; 1949 B->mapping = 0; 1950 B->factor = 0; 1951 B->assembled = PETSC_FALSE; 1952 1953 B->insertmode = NOT_SET_VALUES; 1954 MPI_Comm_rank(comm,&b->rank); 1955 MPI_Comm_size(comm,&b->size); 1956 1957 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1958 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1959 } 1960 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1961 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1962 } 1963 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1964 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1965 } 1966 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1967 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1968 1969 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1970 work[0] = m; work[1] = n; 1971 mbs = m/bs; nbs = n/bs; 1972 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1973 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1974 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1975 } 1976 if (m == PETSC_DECIDE) { 1977 Mbs = M/bs; 1978 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1979 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1980 m = mbs*bs; 1981 } 1982 if (n == PETSC_DECIDE) { 1983 Nbs = N/bs; 1984 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1985 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1986 n = nbs*bs; 1987 } 1988 if (mbs*bs != m || nbs*bs != n) { 1989 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1990 } 1991 1992 b->m = m; B->m = m; 1993 b->n = n; B->n = n; 1994 b->N = N; B->N = N; 1995 b->M = M; B->M = M; 1996 b->bs = bs; 1997 b->bs2 = bs*bs; 1998 b->mbs = mbs; 1999 b->nbs = nbs; 2000 b->Mbs = Mbs; 2001 b->Nbs = Nbs; 2002 2003 /* the information in the maps duplicates the information computed below, eventually 2004 we should remove the duplicate information that is not contained in the maps */ 2005 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2006 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2007 2008 /* build local table of row and column ownerships */ 2009 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 2010 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2011 b->cowners = b->rowners + b->size + 2; 2012 b->rowners_bs = b->cowners + b->size + 2; 2013 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2014 b->rowners[0] = 0; 2015 for ( i=2; i<=b->size; i++ ) { 2016 b->rowners[i] += b->rowners[i-1]; 2017 } 2018 for ( i=0; i<=b->size; i++ ) { 2019 b->rowners_bs[i] = b->rowners[i]*bs; 2020 } 2021 b->rstart = b->rowners[b->rank]; 2022 b->rend = b->rowners[b->rank+1]; 2023 b->rstart_bs = b->rstart * bs; 2024 b->rend_bs = b->rend * bs; 2025 2026 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2027 b->cowners[0] = 0; 2028 for ( i=2; i<=b->size; i++ ) { 2029 b->cowners[i] += b->cowners[i-1]; 2030 } 2031 b->cstart = b->cowners[b->rank]; 2032 b->cend = b->cowners[b->rank+1]; 2033 b->cstart_bs = b->cstart * bs; 2034 b->cend_bs = b->cend * bs; 2035 2036 2037 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2038 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 2039 PLogObjectParent(B,b->A); 2040 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2041 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 2042 PLogObjectParent(B,b->B); 2043 2044 /* build cache for off array entries formed */ 2045 ierr = StashCreate_Private(comm,1,&b->stash); CHKERRQ(ierr); 2046 ierr = StashCreate_Private(comm,bs,&b->bstash); CHKERRQ(ierr); 2047 b->donotstash = 0; 2048 b->colmap = 0; 2049 b->garray = 0; 2050 b->roworiented = 1; 2051 2052 /* stuff used in block assembly */ 2053 b->barray = 0; 2054 2055 /* stuff used for matrix vector multiply */ 2056 b->lvec = 0; 2057 b->Mvctx = 0; 2058 2059 /* stuff for MatGetRow() */ 2060 b->rowindices = 0; 2061 b->rowvalues = 0; 2062 b->getrowactive = PETSC_FALSE; 2063 2064 /* hash table stuff */ 2065 b->ht = 0; 2066 b->hd = 0; 2067 b->ht_size = 0; 2068 b->ht_flag = 0; 2069 b->ht_fact = 0; 2070 b->ht_total_ct = 0; 2071 b->ht_insert_ct = 0; 2072 2073 *A = B; 2074 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2075 if (flg) { 2076 double fact = 1.39; 2077 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2078 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2079 if (fact <= 1.0) fact = 1.39; 2080 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2081 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2082 } 2083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2084 "MatGetDiagonalBlock_MPIBAIJ", 2085 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2086 PetscFunctionReturn(0); 2087 } 2088 2089 #undef __FUNC__ 2090 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2091 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2092 { 2093 Mat mat; 2094 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2095 int ierr, len=0, flg; 2096 2097 PetscFunctionBegin; 2098 *newmat = 0; 2099 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2100 PLogObjectCreate(mat); 2101 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2102 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2103 mat->ops->destroy = MatDestroy_MPIBAIJ; 2104 mat->ops->view = MatView_MPIBAIJ; 2105 mat->factor = matin->factor; 2106 mat->assembled = PETSC_TRUE; 2107 mat->insertmode = NOT_SET_VALUES; 2108 2109 a->m = mat->m = oldmat->m; 2110 a->n = mat->n = oldmat->n; 2111 a->M = mat->M = oldmat->M; 2112 a->N = mat->N = oldmat->N; 2113 2114 a->bs = oldmat->bs; 2115 a->bs2 = oldmat->bs2; 2116 a->mbs = oldmat->mbs; 2117 a->nbs = oldmat->nbs; 2118 a->Mbs = oldmat->Mbs; 2119 a->Nbs = oldmat->Nbs; 2120 2121 a->rstart = oldmat->rstart; 2122 a->rend = oldmat->rend; 2123 a->cstart = oldmat->cstart; 2124 a->cend = oldmat->cend; 2125 a->size = oldmat->size; 2126 a->rank = oldmat->rank; 2127 a->donotstash = oldmat->donotstash; 2128 a->roworiented = oldmat->roworiented; 2129 a->rowindices = 0; 2130 a->rowvalues = 0; 2131 a->getrowactive = PETSC_FALSE; 2132 a->barray = 0; 2133 a->rstart_bs = oldmat->rstart_bs; 2134 a->rend_bs = oldmat->rend_bs; 2135 a->cstart_bs = oldmat->cstart_bs; 2136 a->cend_bs = oldmat->cend_bs; 2137 2138 /* hash table stuff */ 2139 a->ht = 0; 2140 a->hd = 0; 2141 a->ht_size = 0; 2142 a->ht_flag = oldmat->ht_flag; 2143 a->ht_fact = oldmat->ht_fact; 2144 a->ht_total_ct = 0; 2145 a->ht_insert_ct = 0; 2146 2147 2148 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2149 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2150 a->cowners = a->rowners + a->size + 2; 2151 a->rowners_bs = a->cowners + a->size + 2; 2152 PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int)); 2153 ierr = StashCreate_Private(matin->comm,1,&a->stash); CHKERRQ(ierr); 2154 ierr = StashCreate_Private(matin->comm,oldmat->bs,&a->bstash); CHKERRQ(ierr); 2155 if (oldmat->colmap) { 2156 #if defined (USE_CTABLE) 2157 ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr); 2158 #else 2159 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2160 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2161 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2162 #endif 2163 } else a->colmap = 0; 2164 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2165 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2166 PLogObjectMemory(mat,len*sizeof(int)); 2167 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2168 } else a->garray = 0; 2169 2170 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2171 PLogObjectParent(mat,a->lvec); 2172 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2173 2174 PLogObjectParent(mat,a->Mvctx); 2175 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2176 PLogObjectParent(mat,a->A); 2177 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2178 PLogObjectParent(mat,a->B); 2179 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2180 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2181 if (flg) { 2182 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2183 } 2184 *newmat = mat; 2185 PetscFunctionReturn(0); 2186 } 2187 2188 #include "sys.h" 2189 2190 #undef __FUNC__ 2191 #define __FUNC__ "MatLoad_MPIBAIJ" 2192 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2193 { 2194 Mat A; 2195 int i, nz, ierr, j,rstart, rend, fd; 2196 Scalar *vals,*buf; 2197 MPI_Comm comm = ((PetscObject)viewer)->comm; 2198 MPI_Status status; 2199 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2200 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2201 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2202 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2203 int dcount,kmax,k,nzcount,tmp; 2204 2205 PetscFunctionBegin; 2206 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2207 2208 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2209 if (!rank) { 2210 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2211 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2212 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2213 if (header[3] < 0) { 2214 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2215 } 2216 } 2217 2218 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2219 M = header[1]; N = header[2]; 2220 2221 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2222 2223 /* 2224 This code adds extra rows to make sure the number of rows is 2225 divisible by the blocksize 2226 */ 2227 Mbs = M/bs; 2228 extra_rows = bs - M + bs*(Mbs); 2229 if (extra_rows == bs) extra_rows = 0; 2230 else Mbs++; 2231 if (extra_rows &&!rank) { 2232 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2233 } 2234 2235 /* determine ownership of all rows */ 2236 mbs = Mbs/size + ((Mbs % size) > rank); 2237 m = mbs * bs; 2238 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2239 browners = rowners + size + 1; 2240 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2241 rowners[0] = 0; 2242 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2243 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2244 rstart = rowners[rank]; 2245 rend = rowners[rank+1]; 2246 2247 /* distribute row lengths to all processors */ 2248 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2249 if (!rank) { 2250 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2251 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2252 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2253 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2254 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2255 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2256 PetscFree(sndcounts); 2257 } else { 2258 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2259 } 2260 2261 if (!rank) { 2262 /* calculate the number of nonzeros on each processor */ 2263 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2264 PetscMemzero(procsnz,size*sizeof(int)); 2265 for ( i=0; i<size; i++ ) { 2266 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2267 procsnz[i] += rowlengths[j]; 2268 } 2269 } 2270 PetscFree(rowlengths); 2271 2272 /* determine max buffer needed and allocate it */ 2273 maxnz = 0; 2274 for ( i=0; i<size; i++ ) { 2275 maxnz = PetscMax(maxnz,procsnz[i]); 2276 } 2277 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2278 2279 /* read in my part of the matrix column indices */ 2280 nz = procsnz[0]; 2281 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2282 mycols = ibuf; 2283 if (size == 1) nz -= extra_rows; 2284 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2285 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2286 2287 /* read in every ones (except the last) and ship off */ 2288 for ( i=1; i<size-1; i++ ) { 2289 nz = procsnz[i]; 2290 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2291 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2292 } 2293 /* read in the stuff for the last proc */ 2294 if ( size != 1 ) { 2295 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2296 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2297 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2298 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2299 } 2300 PetscFree(cols); 2301 } else { 2302 /* determine buffer space needed for message */ 2303 nz = 0; 2304 for ( i=0; i<m; i++ ) { 2305 nz += locrowlens[i]; 2306 } 2307 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2308 mycols = ibuf; 2309 /* receive message of column indices*/ 2310 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2311 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2312 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2313 } 2314 2315 /* loop over local rows, determining number of off diagonal entries */ 2316 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2317 odlens = dlens + (rend-rstart); 2318 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2319 PetscMemzero(mask,3*Mbs*sizeof(int)); 2320 masked1 = mask + Mbs; 2321 masked2 = masked1 + Mbs; 2322 rowcount = 0; nzcount = 0; 2323 for ( i=0; i<mbs; i++ ) { 2324 dcount = 0; 2325 odcount = 0; 2326 for ( j=0; j<bs; j++ ) { 2327 kmax = locrowlens[rowcount]; 2328 for ( k=0; k<kmax; k++ ) { 2329 tmp = mycols[nzcount++]/bs; 2330 if (!mask[tmp]) { 2331 mask[tmp] = 1; 2332 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2333 else masked1[dcount++] = tmp; 2334 } 2335 } 2336 rowcount++; 2337 } 2338 2339 dlens[i] = dcount; 2340 odlens[i] = odcount; 2341 2342 /* zero out the mask elements we set */ 2343 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2344 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2345 } 2346 2347 /* create our matrix */ 2348 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2349 CHKERRQ(ierr); 2350 A = *newmat; 2351 MatSetOption(A,MAT_COLUMNS_SORTED); 2352 2353 if (!rank) { 2354 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2355 /* read in my part of the matrix numerical values */ 2356 nz = procsnz[0]; 2357 vals = buf; 2358 mycols = ibuf; 2359 if (size == 1) nz -= extra_rows; 2360 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2361 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2362 2363 /* insert into matrix */ 2364 jj = rstart*bs; 2365 for ( i=0; i<m; i++ ) { 2366 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2367 mycols += locrowlens[i]; 2368 vals += locrowlens[i]; 2369 jj++; 2370 } 2371 /* read in other processors (except the last one) and ship out */ 2372 for ( i=1; i<size-1; i++ ) { 2373 nz = procsnz[i]; 2374 vals = buf; 2375 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2376 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2377 } 2378 /* the last proc */ 2379 if ( size != 1 ){ 2380 nz = procsnz[i] - extra_rows; 2381 vals = buf; 2382 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2383 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2384 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2385 } 2386 PetscFree(procsnz); 2387 } else { 2388 /* receive numeric values */ 2389 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2390 2391 /* receive message of values*/ 2392 vals = buf; 2393 mycols = ibuf; 2394 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2395 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2396 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2397 2398 /* insert into matrix */ 2399 jj = rstart*bs; 2400 for ( i=0; i<m; i++ ) { 2401 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2402 mycols += locrowlens[i]; 2403 vals += locrowlens[i]; 2404 jj++; 2405 } 2406 } 2407 PetscFree(locrowlens); 2408 PetscFree(buf); 2409 PetscFree(ibuf); 2410 PetscFree(rowners); 2411 PetscFree(dlens); 2412 PetscFree(mask); 2413 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2414 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2415 PetscFunctionReturn(0); 2416 } 2417 2418 2419 2420 #undef __FUNC__ 2421 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2422 /*@ 2423 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2424 2425 Input Parameters: 2426 . mat - the matrix 2427 . fact - factor 2428 2429 Collective on Mat 2430 2431 Level: advanced 2432 2433 Notes: 2434 This can also be set by the command line option: -mat_use_hash_table fact 2435 2436 .keywords: matrix, hashtable, factor, HT 2437 2438 .seealso: MatSetOption() 2439 @*/ 2440 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2441 { 2442 Mat_MPIBAIJ *baij; 2443 2444 PetscFunctionBegin; 2445 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2446 if (mat->type != MATMPIBAIJ) { 2447 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2448 } 2449 baij = (Mat_MPIBAIJ*) mat->data; 2450 baij->ht_fact = fact; 2451 PetscFunctionReturn(0); 2452 } 2453