1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.135 1998/08/25 19:52:14 bsmith Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" /*I "mat.h" I*/ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 15 16 /* 17 Local utility routine that creates a mapping from the global column 18 number to the local number in the off-diagonal part of the local 19 storage of the matrix. This is done in a non scable way since the 20 length of colmap equals the global matrix length. 21 */ 22 #undef __FUNC__ 23 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 24 static int CreateColmap_MPIBAIJ_Private(Mat mat) 25 { 26 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 27 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 28 int nbs = B->nbs,i,bs=B->bs;; 29 30 PetscFunctionBegin; 31 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 32 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 33 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 34 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 35 PetscFunctionReturn(0); 36 } 37 38 #define CHUNKSIZE 10 39 40 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 41 { \ 42 \ 43 brow = row/bs; \ 44 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 45 rmax = aimax[brow]; nrow = ailen[brow]; \ 46 bcol = col/bs; \ 47 ridx = row % bs; cidx = col % bs; \ 48 low = 0; high = nrow; \ 49 while (high-low > 3) { \ 50 t = (low+high)/2; \ 51 if (rp[t] > bcol) high = t; \ 52 else low = t; \ 53 } \ 54 for ( _i=low; _i<high; _i++ ) { \ 55 if (rp[_i] > bcol) break; \ 56 if (rp[_i] == bcol) { \ 57 bap = ap + bs2*_i + bs*cidx + ridx; \ 58 if (addv == ADD_VALUES) *bap += value; \ 59 else *bap = value; \ 60 goto a_noinsert; \ 61 } \ 62 } \ 63 if (a->nonew == 1) goto a_noinsert; \ 64 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 65 if (nrow >= rmax) { \ 66 /* there is no extra room in row, therefore enlarge */ \ 67 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 68 Scalar *new_a; \ 69 \ 70 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 71 \ 72 /* malloc new storage space */ \ 73 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 74 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 75 new_j = (int *) (new_a + bs2*new_nz); \ 76 new_i = new_j + new_nz; \ 77 \ 78 /* copy over old data into new slots */ \ 79 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 80 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 81 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 82 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 83 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 84 len*sizeof(int)); \ 85 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 86 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 87 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 88 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 89 /* free up old matrix storage */ \ 90 PetscFree(a->a); \ 91 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 92 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 93 a->singlemalloc = 1; \ 94 \ 95 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 96 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 97 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 98 a->maxnz += bs2*CHUNKSIZE; \ 99 a->reallocs++; \ 100 a->nz++; \ 101 } \ 102 N = nrow++ - 1; \ 103 /* shift up all the later entries in this row */ \ 104 for ( ii=N; ii>=_i; ii-- ) { \ 105 rp[ii+1] = rp[ii]; \ 106 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 107 } \ 108 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 109 rp[_i] = bcol; \ 110 ap[bs2*_i + bs*cidx + ridx] = value; \ 111 a_noinsert:; \ 112 ailen[brow] = nrow; \ 113 } 114 115 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 116 { \ 117 \ 118 brow = row/bs; \ 119 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 120 rmax = bimax[brow]; nrow = bilen[brow]; \ 121 bcol = col/bs; \ 122 ridx = row % bs; cidx = col % bs; \ 123 low = 0; high = nrow; \ 124 while (high-low > 3) { \ 125 t = (low+high)/2; \ 126 if (rp[t] > bcol) high = t; \ 127 else low = t; \ 128 } \ 129 for ( _i=low; _i<high; _i++ ) { \ 130 if (rp[_i] > bcol) break; \ 131 if (rp[_i] == bcol) { \ 132 bap = ap + bs2*_i + bs*cidx + ridx; \ 133 if (addv == ADD_VALUES) *bap += value; \ 134 else *bap = value; \ 135 goto b_noinsert; \ 136 } \ 137 } \ 138 if (b->nonew == 1) goto b_noinsert; \ 139 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 140 if (nrow >= rmax) { \ 141 /* there is no extra room in row, therefore enlarge */ \ 142 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 143 Scalar *new_a; \ 144 \ 145 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 146 \ 147 /* malloc new storage space */ \ 148 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 149 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 150 new_j = (int *) (new_a + bs2*new_nz); \ 151 new_i = new_j + new_nz; \ 152 \ 153 /* copy over old data into new slots */ \ 154 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 155 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 156 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 157 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 158 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 159 len*sizeof(int)); \ 160 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 161 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 162 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 163 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 164 /* free up old matrix storage */ \ 165 PetscFree(b->a); \ 166 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 167 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 168 b->singlemalloc = 1; \ 169 \ 170 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 171 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 172 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 173 b->maxnz += bs2*CHUNKSIZE; \ 174 b->reallocs++; \ 175 b->nz++; \ 176 } \ 177 N = nrow++ - 1; \ 178 /* shift up all the later entries in this row */ \ 179 for ( ii=N; ii>=_i; ii-- ) { \ 180 rp[ii+1] = rp[ii]; \ 181 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 182 } \ 183 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 184 rp[_i] = bcol; \ 185 ap[bs2*_i + bs*cidx + ridx] = value; \ 186 b_noinsert:; \ 187 bilen[brow] = nrow; \ 188 } 189 190 #undef __FUNC__ 191 #define __FUNC__ "MatSetValues_MPIBAIJ" 192 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 193 { 194 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 195 Scalar value; 196 int ierr,i,j,row,col; 197 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 198 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 199 int cend_orig=baij->cend_bs,bs=baij->bs; 200 201 /* Some Variables required in the macro */ 202 Mat A = baij->A; 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 204 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 205 Scalar *aa=a->a; 206 207 Mat B = baij->B; 208 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 209 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 210 Scalar *ba=b->a; 211 212 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 213 int low,high,t,ridx,cidx,bs2=a->bs2; 214 Scalar *ap,*bap; 215 216 PetscFunctionBegin; 217 for ( i=0; i<m; i++ ) { 218 #if defined(USE_PETSC_BOPT_g) 219 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 220 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 221 #endif 222 if (im[i] >= rstart_orig && im[i] < rend_orig) { 223 row = im[i] - rstart_orig; 224 for ( j=0; j<n; j++ ) { 225 if (in[j] >= cstart_orig && in[j] < cend_orig){ 226 col = in[j] - cstart_orig; 227 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 228 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } 231 #if defined(USE_PETSC_BOPT_g) 232 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 233 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 241 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 242 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 243 col = in[j]; 244 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 245 B = baij->B; 246 b = (Mat_SeqBAIJ *) (B)->data; 247 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 248 ba=b->a; 249 } 250 } else col = in[j]; 251 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 252 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (roworiented && !baij->donotstash) { 258 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 259 } else { 260 if (!baij->donotstash) { 261 row = im[i]; 262 for ( j=0; j<n; j++ ) { 263 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 } 269 PetscFunctionReturn(0); 270 } 271 272 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 273 #undef __FUNC__ 274 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 275 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 276 { 277 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 278 Scalar *value,*barray=baij->barray; 279 int ierr,i,j,ii,jj,row,col,k,l; 280 int roworiented = baij->roworiented,rstart=baij->rstart ; 281 int rend=baij->rend,cstart=baij->cstart,stepval; 282 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 283 284 if(!barray) { 285 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 286 } 287 288 if (roworiented) { 289 stepval = (n-1)*bs; 290 } else { 291 stepval = (m-1)*bs; 292 } 293 for ( i=0; i<m; i++ ) { 294 #if defined(USE_PETSC_BOPT_g) 295 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 296 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 297 #endif 298 if (im[i] >= rstart && im[i] < rend) { 299 row = im[i] - rstart; 300 for ( j=0; j<n; j++ ) { 301 /* If NumCol = 1 then a copy is not required */ 302 if ((roworiented) && (n == 1)) { 303 barray = v + i*bs2; 304 } else if((!roworiented) && (m == 1)) { 305 barray = v + j*bs2; 306 } else { /* Here a copy is required */ 307 if (roworiented) { 308 value = v + i*(stepval+bs)*bs + j*bs; 309 } else { 310 value = v + j*(stepval+bs)*bs + i*bs; 311 } 312 for ( ii=0; ii<bs; ii++,value+=stepval ) { 313 for (jj=0; jj<bs; jj++ ) { 314 *barray++ = *value++; 315 } 316 } 317 barray -=bs2; 318 } 319 320 if (in[j] >= cstart && in[j] < cend){ 321 col = in[j] - cstart; 322 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 323 } 324 #if defined(USE_PETSC_BOPT_g) 325 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 326 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 327 #endif 328 else { 329 if (mat->was_assembled) { 330 if (!baij->colmap) { 331 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 332 } 333 334 #if defined(USE_PETSC_BOPT_g) 335 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 336 #endif 337 col = (baij->colmap[in[j]] - 1)/bs; 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } 343 else col = in[j]; 344 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 345 } 346 } 347 } else { 348 if (!baij->donotstash) { 349 if (roworiented ) { 350 row = im[i]*bs; 351 value = v + i*(stepval+bs)*bs; 352 for ( j=0; j<bs; j++,row++ ) { 353 for ( k=0; k<n; k++ ) { 354 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 355 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 356 } 357 } 358 } 359 } else { 360 for ( j=0; j<n; j++ ) { 361 value = v + j*(stepval+bs)*bs + i*bs; 362 col = in[j]*bs; 363 for ( k=0; k<bs; k++,col++,value+=stepval) { 364 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 365 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 366 } 367 } 368 } 369 } 370 } 371 } 372 } 373 PetscFunctionReturn(0); 374 } 375 #include <math.h> 376 #define HASH_KEY 0.6180339887 377 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 379 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 380 #undef __FUNC__ 381 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 382 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 383 { 384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 385 int ierr,i,j,row,col; 386 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 387 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 388 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 389 double tmp; 390 Scalar ** HD = baij->hd,value; 391 #if defined(USE_PETSC_BOPT_g) 392 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 393 #endif 394 395 PetscFunctionBegin; 396 397 for ( i=0; i<m; i++ ) { 398 #if defined(USE_PETSC_BOPT_g) 399 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 400 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 401 #endif 402 row = im[i]; 403 if (row >= rstart_orig && row < rend_orig) { 404 for ( j=0; j<n; j++ ) { 405 col = in[j]; 406 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 407 /* Look up into the Hash Table */ 408 key = (row/bs)*Nbs+(col/bs)+1; 409 h1 = HASH(size,key,tmp); 410 411 412 idx = h1; 413 #if defined(USE_PETSC_BOPT_g) 414 insert_ct++; 415 total_ct++; 416 if (HT[idx] != key) { 417 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 418 if (idx == size) { 419 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 420 if (idx == h1) { 421 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 422 } 423 } 424 } 425 #else 426 if (HT[idx] != key) { 427 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 428 if (idx == size) { 429 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 430 if (idx == h1) { 431 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 432 } 433 } 434 } 435 #endif 436 /* A HASH table entry is found, so insert the values at the correct address */ 437 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 438 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 439 } 440 } else { 441 if (roworiented && !baij->donotstash) { 442 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 443 } else { 444 if (!baij->donotstash) { 445 row = im[i]; 446 for ( j=0; j<n; j++ ) { 447 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 448 } 449 } 450 } 451 } 452 } 453 #if defined(USE_PETSC_BOPT_g) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNC__ 461 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 465 int ierr,i,j,ii,jj,row,col,k,l; 466 int roworiented = baij->roworiented,rstart=baij->rstart ; 467 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 468 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 469 double tmp; 470 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 471 #if defined(USE_PETSC_BOPT_g) 472 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 473 #endif 474 475 PetscFunctionBegin; 476 477 if (roworiented) { 478 stepval = (n-1)*bs; 479 } else { 480 stepval = (m-1)*bs; 481 } 482 for ( i=0; i<m; i++ ) { 483 #if defined(USE_PETSC_BOPT_g) 484 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 485 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 486 #endif 487 row = im[i]; 488 v_t = v + i*bs2; 489 if (row >= rstart && row < rend) { 490 for ( j=0; j<n; j++ ) { 491 col = in[j]; 492 493 /* Look up into the Hash Table */ 494 key = row*Nbs+col+1; 495 h1 = HASH(size,key,tmp); 496 497 idx = h1; 498 #if defined(USE_PETSC_BOPT_g) 499 total_ct++; 500 insert_ct++; 501 if (HT[idx] != key) { 502 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 503 if (idx == size) { 504 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 505 if (idx == h1) { 506 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 507 } 508 } 509 } 510 #else 511 if (HT[idx] != key) { 512 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 513 if (idx == size) { 514 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 515 if (idx == h1) { 516 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 517 } 518 } 519 } 520 #endif 521 baij_a = HD[idx]; 522 if (roworiented) { 523 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 524 /* value = v + (i*(stepval+bs)+j)*bs; */ 525 value = v_t; 526 v_t += bs; 527 if (addv == ADD_VALUES) { 528 for ( ii=0; ii<bs; ii++,value+=stepval) { 529 for ( jj=ii; jj<bs2; jj+=bs ) { 530 baij_a[jj] += *value++; 531 } 532 } 533 } else { 534 for ( ii=0; ii<bs; ii++,value+=stepval) { 535 for ( jj=ii; jj<bs2; jj+=bs ) { 536 baij_a[jj] = *value++; 537 } 538 } 539 } 540 } else { 541 value = v + j*(stepval+bs)*bs + i*bs; 542 if (addv == ADD_VALUES) { 543 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 544 for ( jj=0; jj<bs; jj++ ) { 545 baij_a[jj] += *value++; 546 } 547 } 548 } else { 549 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 550 for ( jj=0; jj<bs; jj++ ) { 551 baij_a[jj] = *value++; 552 } 553 } 554 } 555 } 556 } 557 } else { 558 if (!baij->donotstash) { 559 if (roworiented ) { 560 row = im[i]*bs; 561 value = v + i*(stepval+bs)*bs; 562 for ( j=0; j<bs; j++,row++ ) { 563 for ( k=0; k<n; k++ ) { 564 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 565 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 566 } 567 } 568 } 569 } else { 570 for ( j=0; j<n; j++ ) { 571 value = v + j*(stepval+bs)*bs + i*bs; 572 col = in[j]*bs; 573 for ( k=0; k<bs; k++,col++,value+=stepval) { 574 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 575 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 576 } 577 } 578 } 579 } 580 } 581 } 582 } 583 #if defined(USE_PETSC_BOPT_g) 584 baij->ht_total_ct = total_ct; 585 baij->ht_insert_ct = insert_ct; 586 #endif 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNC__ 591 #define __FUNC__ "MatGetValues_MPIBAIJ" 592 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 593 { 594 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 595 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 596 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 597 598 PetscFunctionBegin; 599 for ( i=0; i<m; i++ ) { 600 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 601 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 602 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 603 row = idxm[i] - bsrstart; 604 for ( j=0; j<n; j++ ) { 605 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 606 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 607 if (idxn[j] >= bscstart && idxn[j] < bscend){ 608 col = idxn[j] - bscstart; 609 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 610 } else { 611 if (!baij->colmap) { 612 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 613 } 614 if((baij->colmap[idxn[j]/bs]-1 < 0) || 615 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 616 else { 617 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 618 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 619 } 620 } 621 } 622 } else { 623 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 624 } 625 } 626 PetscFunctionReturn(0); 627 } 628 629 #undef __FUNC__ 630 #define __FUNC__ "MatNorm_MPIBAIJ" 631 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 632 { 633 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 634 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 635 int ierr, i,bs2=baij->bs2; 636 double sum = 0.0; 637 Scalar *v; 638 639 PetscFunctionBegin; 640 if (baij->size == 1) { 641 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 642 } else { 643 if (type == NORM_FROBENIUS) { 644 v = amat->a; 645 for (i=0; i<amat->nz*bs2; i++ ) { 646 #if defined(USE_PETSC_COMPLEX) 647 sum += PetscReal(PetscConj(*v)*(*v)); v++; 648 #else 649 sum += (*v)*(*v); v++; 650 #endif 651 } 652 v = bmat->a; 653 for (i=0; i<bmat->nz*bs2; i++ ) { 654 #if defined(USE_PETSC_COMPLEX) 655 sum += PetscReal(PetscConj(*v)*(*v)); v++; 656 #else 657 sum += (*v)*(*v); v++; 658 #endif 659 } 660 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 661 *norm = sqrt(*norm); 662 } else { 663 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 664 } 665 } 666 PetscFunctionReturn(0); 667 } 668 669 #undef __FUNC__ 670 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 671 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 672 { 673 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 674 MPI_Comm comm = mat->comm; 675 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 676 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 677 MPI_Request *send_waits,*recv_waits; 678 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 679 InsertMode addv; 680 Scalar *rvalues,*svalues; 681 682 PetscFunctionBegin; 683 if (baij->donotstash) { 684 baij->svalues = 0; baij->rvalues = 0; 685 baij->nsends = 0; baij->nrecvs = 0; 686 baij->send_waits = 0; baij->recv_waits = 0; 687 baij->rmax = 0; 688 PetscFunctionReturn(0); 689 } 690 691 /* make sure all processors are either in INSERTMODE or ADDMODE */ 692 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 693 if (addv == (ADD_VALUES|INSERT_VALUES)) { 694 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 695 } 696 mat->insertmode = addv; /* in case this processor had no cache */ 697 698 /* first count number of contributors to each processor */ 699 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 700 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 701 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 702 for ( i=0; i<baij->stash.n; i++ ) { 703 idx = baij->stash.idx[i]; 704 for ( j=0; j<size; j++ ) { 705 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 706 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 707 } 708 } 709 } 710 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 711 712 /* inform other processors of number of messages and max length*/ 713 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 714 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 715 nreceives = work[rank]; 716 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 717 nmax = work[rank]; 718 PetscFree(work); 719 720 /* post receives: 721 1) each message will consist of ordered pairs 722 (global index,value) we store the global index as a double 723 to simplify the message passing. 724 2) since we don't know how long each individual message is we 725 allocate the largest needed buffer for each receive. Potentially 726 this is a lot of wasted space. 727 728 729 This could be done better. 730 */ 731 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 732 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 733 for ( i=0; i<nreceives; i++ ) { 734 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 735 } 736 737 /* do sends: 738 1) starts[i] gives the starting index in svalues for stuff going to 739 the ith processor 740 */ 741 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 742 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 743 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 744 starts[0] = 0; 745 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 746 for ( i=0; i<baij->stash.n; i++ ) { 747 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 748 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 749 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 750 } 751 PetscFree(owner); 752 starts[0] = 0; 753 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 754 count = 0; 755 for ( i=0; i<size; i++ ) { 756 if (procs[i]) { 757 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 758 } 759 } 760 PetscFree(starts); PetscFree(nprocs); 761 762 /* Free cache space */ 763 PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 764 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 765 766 baij->svalues = svalues; baij->rvalues = rvalues; 767 baij->nsends = nsends; baij->nrecvs = nreceives; 768 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 769 baij->rmax = nmax; 770 771 PetscFunctionReturn(0); 772 } 773 774 /* 775 Creates the hash table, and sets the table 776 This table is created only once. 777 If new entried need to be added to the matrix 778 then the hash table has to be destroyed and 779 recreated. 780 */ 781 #undef __FUNC__ 782 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 783 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 784 { 785 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 786 Mat A = baij->A, B=baij->B; 787 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 788 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 789 int size,bs2=baij->bs2,rstart=baij->rstart; 790 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 791 int *HT,key; 792 Scalar **HD; 793 double tmp; 794 #if defined(USE_PETSC_BOPT_g) 795 int ct=0,max=0; 796 #endif 797 798 PetscFunctionBegin; 799 baij->ht_size=(int)(factor*nz); 800 size = baij->ht_size; 801 802 if (baij->ht) { 803 PetscFunctionReturn(0); 804 } 805 806 /* Allocate Memory for Hash Table */ 807 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 808 baij->ht = (int*)(baij->hd + size); 809 HD = baij->hd; 810 HT = baij->ht; 811 812 813 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 814 815 816 /* Loop Over A */ 817 for ( i=0; i<a->mbs; i++ ) { 818 for ( j=ai[i]; j<ai[i+1]; j++ ) { 819 row = i+rstart; 820 col = aj[j]+cstart; 821 822 key = row*Nbs + col + 1; 823 h1 = HASH(size,key,tmp); 824 for ( k=0; k<size; k++ ){ 825 if (HT[(h1+k)%size] == 0.0) { 826 HT[(h1+k)%size] = key; 827 HD[(h1+k)%size] = a->a + j*bs2; 828 break; 829 #if defined(USE_PETSC_BOPT_g) 830 } else { 831 ct++; 832 #endif 833 } 834 } 835 #if defined(USE_PETSC_BOPT_g) 836 if (k> max) max = k; 837 #endif 838 } 839 } 840 /* Loop Over B */ 841 for ( i=0; i<b->mbs; i++ ) { 842 for ( j=bi[i]; j<bi[i+1]; j++ ) { 843 row = i+rstart; 844 col = garray[bj[j]]; 845 key = row*Nbs + col + 1; 846 h1 = HASH(size,key,tmp); 847 for ( k=0; k<size; k++ ){ 848 if (HT[(h1+k)%size] == 0.0) { 849 HT[(h1+k)%size] = key; 850 HD[(h1+k)%size] = b->a + j*bs2; 851 break; 852 #if defined(USE_PETSC_BOPT_g) 853 } else { 854 ct++; 855 #endif 856 } 857 } 858 #if defined(USE_PETSC_BOPT_g) 859 if (k> max) max = k; 860 #endif 861 } 862 } 863 864 /* Print Summary */ 865 #if defined(USE_PETSC_BOPT_g) 866 for ( i=0,j=0; i<size; i++) 867 if (HT[i]) {j++;} 868 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 869 (j== 0)? 0.0:((double)(ct+j))/j,max); 870 #endif 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNC__ 875 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 876 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 877 { 878 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 879 MPI_Status *send_status,recv_status; 880 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 881 int bs=baij->bs,row,col,other_disassembled; 882 Scalar *values,val; 883 InsertMode addv = mat->insertmode; 884 885 PetscFunctionBegin; 886 /* wait on receives */ 887 while (count) { 888 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 889 /* unpack receives into our local space */ 890 values = baij->rvalues + 3*imdex*baij->rmax; 891 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 892 n = n/3; 893 for ( i=0; i<n; i++ ) { 894 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 895 col = (int) PetscReal(values[3*i+1]); 896 val = values[3*i+2]; 897 if (col >= baij->cstart*bs && col < baij->cend*bs) { 898 col -= baij->cstart*bs; 899 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 900 } else { 901 if (mat->was_assembled) { 902 if (!baij->colmap) { 903 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 904 } 905 col = (baij->colmap[col/bs]) - 1 + col%bs; 906 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 907 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 908 col = (int) PetscReal(values[3*i+1]); 909 } 910 } 911 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 912 } 913 } 914 count--; 915 } 916 if (baij->recv_waits) PetscFree(baij->recv_waits); 917 if (baij->rvalues) PetscFree(baij->rvalues); 918 919 /* wait on sends */ 920 if (baij->nsends) { 921 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 922 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 923 PetscFree(send_status); 924 } 925 if (baij->send_waits) PetscFree(baij->send_waits); 926 if (baij->svalues) PetscFree(baij->svalues); 927 928 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 930 931 /* determine if any processor has disassembled, if so we must 932 also disassemble ourselfs, in order that we may reassemble. */ 933 /* 934 if nonzero structure of submatrix B cannot change then we know that 935 no processor disassembled thus we can skip this stuff 936 */ 937 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 938 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 939 if (mat->was_assembled && !other_disassembled) { 940 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 941 } 942 } 943 944 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 945 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 946 } 947 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 948 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 949 950 #if defined(USE_PETSC_BOPT_g) 951 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 952 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 953 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 954 baij->ht_total_ct = 0; 955 baij->ht_insert_ct = 0; 956 } 957 #endif 958 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 959 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 960 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 961 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 962 } 963 964 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNC__ 969 #define __FUNC__ "MatView_MPIBAIJ_Binary" 970 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 971 { 972 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 973 int ierr; 974 975 PetscFunctionBegin; 976 if (baij->size == 1) { 977 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 978 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNC__ 983 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 984 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 985 { 986 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 987 int ierr, format,rank,bs = baij->bs; 988 FILE *fd; 989 ViewerType vtype; 990 991 PetscFunctionBegin; 992 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 993 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 994 ierr = ViewerGetFormat(viewer,&format); 995 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 996 MatInfo info; 997 MPI_Comm_rank(mat->comm,&rank); 998 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 999 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 1000 PetscSequentialPhaseBegin(mat->comm,1); 1001 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1002 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1003 baij->bs,(int)info.memory); 1004 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 1005 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 1006 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 1007 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 1008 fflush(fd); 1009 PetscSequentialPhaseEnd(mat->comm,1); 1010 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1013 PetscPrintf(mat->comm," block size is %d\n",bs); 1014 PetscFunctionReturn(0); 1015 } 1016 } 1017 1018 if (vtype == DRAW_VIEWER) { 1019 Draw draw; 1020 PetscTruth isnull; 1021 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 1022 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1023 } 1024 1025 if (vtype == ASCII_FILE_VIEWER) { 1026 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1027 PetscSequentialPhaseBegin(mat->comm,1); 1028 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 1029 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 1030 baij->cstart*bs,baij->cend*bs); 1031 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1032 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 1033 fflush(fd); 1034 PetscSequentialPhaseEnd(mat->comm,1); 1035 } else { 1036 int size = baij->size; 1037 rank = baij->rank; 1038 if (size == 1) { 1039 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1040 } else { 1041 /* assemble the entire matrix onto first processor. */ 1042 Mat A; 1043 Mat_SeqBAIJ *Aloc; 1044 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 1045 int mbs=baij->mbs; 1046 Scalar *a; 1047 1048 if (!rank) { 1049 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1050 } else { 1051 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1052 } 1053 PLogObjectParent(mat,A); 1054 1055 /* copy over the A part */ 1056 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1057 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1058 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1059 1060 for ( i=0; i<mbs; i++ ) { 1061 rvals[0] = bs*(baij->rstart + i); 1062 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1063 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1064 col = (baij->cstart+aj[j])*bs; 1065 for (k=0; k<bs; k++ ) { 1066 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1067 col++; a += bs; 1068 } 1069 } 1070 } 1071 /* copy over the B part */ 1072 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1073 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1074 for ( i=0; i<mbs; i++ ) { 1075 rvals[0] = bs*(baij->rstart + i); 1076 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1077 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1078 col = baij->garray[aj[j]]*bs; 1079 for (k=0; k<bs; k++ ) { 1080 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1081 col++; a += bs; 1082 } 1083 } 1084 } 1085 PetscFree(rvals); 1086 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1087 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1088 /* 1089 Everyone has to call to draw the matrix since the graphics waits are 1090 synchronized across all processors that share the Draw object 1091 */ 1092 if (!rank || vtype == DRAW_VIEWER) { 1093 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1094 } 1095 ierr = MatDestroy(A); CHKERRQ(ierr); 1096 } 1097 } 1098 PetscFunctionReturn(0); 1099 } 1100 1101 1102 1103 #undef __FUNC__ 1104 #define __FUNC__ "MatView_MPIBAIJ" 1105 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1106 { 1107 int ierr; 1108 ViewerType vtype; 1109 1110 PetscFunctionBegin; 1111 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1112 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 1113 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 1114 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1115 } else if (vtype == BINARY_FILE_VIEWER) { 1116 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1117 } else { 1118 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1119 } 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNC__ 1124 #define __FUNC__ "MatDestroy_MPIBAIJ" 1125 int MatDestroy_MPIBAIJ(Mat mat) 1126 { 1127 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1128 int ierr; 1129 1130 PetscFunctionBegin; 1131 if (--mat->refct > 0) PetscFunctionReturn(0); 1132 1133 if (mat->mapping) { 1134 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 1135 } 1136 if (mat->bmapping) { 1137 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 1138 } 1139 if (mat->rmap) { 1140 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1141 } 1142 if (mat->cmap) { 1143 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1144 } 1145 #if defined(USE_PETSC_LOG) 1146 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1147 #endif 1148 1149 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1150 PetscFree(baij->rowners); 1151 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1152 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1153 if (baij->colmap) PetscFree(baij->colmap); 1154 if (baij->garray) PetscFree(baij->garray); 1155 if (baij->lvec) VecDestroy(baij->lvec); 1156 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1157 if (baij->rowvalues) PetscFree(baij->rowvalues); 1158 if (baij->barray) PetscFree(baij->barray); 1159 if (baij->hd) PetscFree(baij->hd); 1160 PetscFree(baij); 1161 PLogObjectDestroy(mat); 1162 PetscHeaderDestroy(mat); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 #undef __FUNC__ 1167 #define __FUNC__ "MatMult_MPIBAIJ" 1168 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1169 { 1170 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1171 int ierr, nt; 1172 1173 PetscFunctionBegin; 1174 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1175 if (nt != a->n) { 1176 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1177 } 1178 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1179 if (nt != a->m) { 1180 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1181 } 1182 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1183 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1184 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1185 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1186 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNC__ 1191 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1192 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1193 { 1194 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1195 int ierr; 1196 1197 PetscFunctionBegin; 1198 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1199 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1200 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1201 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNC__ 1206 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1207 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1208 { 1209 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1210 int ierr; 1211 1212 PetscFunctionBegin; 1213 /* do nondiagonal part */ 1214 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1215 /* send it on its way */ 1216 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1217 /* do local part */ 1218 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1219 /* receive remote parts: note this assumes the values are not actually */ 1220 /* inserted in yy until the next line, which is true for my implementation*/ 1221 /* but is not perhaps always true. */ 1222 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNC__ 1227 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1228 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1229 { 1230 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1231 int ierr; 1232 1233 PetscFunctionBegin; 1234 /* do nondiagonal part */ 1235 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1236 /* send it on its way */ 1237 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1238 /* do local part */ 1239 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1240 /* receive remote parts: note this assumes the values are not actually */ 1241 /* inserted in yy until the next line, which is true for my implementation*/ 1242 /* but is not perhaps always true. */ 1243 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 /* 1248 This only works correctly for square matrices where the subblock A->A is the 1249 diagonal block 1250 */ 1251 #undef __FUNC__ 1252 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1253 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1254 { 1255 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1256 int ierr; 1257 1258 PetscFunctionBegin; 1259 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1260 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNC__ 1265 #define __FUNC__ "MatScale_MPIBAIJ" 1266 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1267 { 1268 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1269 int ierr; 1270 1271 PetscFunctionBegin; 1272 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1273 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNC__ 1278 #define __FUNC__ "MatGetSize_MPIBAIJ" 1279 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1280 { 1281 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1282 1283 PetscFunctionBegin; 1284 if (m) *m = mat->M; 1285 if (n) *n = mat->N; 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNC__ 1290 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1291 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1292 { 1293 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1294 1295 PetscFunctionBegin; 1296 *m = mat->m; *n = mat->n; 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNC__ 1301 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1302 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1303 { 1304 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1305 1306 PetscFunctionBegin; 1307 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1308 PetscFunctionReturn(0); 1309 } 1310 1311 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1312 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1313 1314 #undef __FUNC__ 1315 #define __FUNC__ "MatGetRow_MPIBAIJ" 1316 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1317 { 1318 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1319 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1320 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1321 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1322 int *cmap, *idx_p,cstart = mat->cstart; 1323 1324 PetscFunctionBegin; 1325 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1326 mat->getrowactive = PETSC_TRUE; 1327 1328 if (!mat->rowvalues && (idx || v)) { 1329 /* 1330 allocate enough space to hold information from the longest row. 1331 */ 1332 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1333 int max = 1,mbs = mat->mbs,tmp; 1334 for ( i=0; i<mbs; i++ ) { 1335 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1336 if (max < tmp) { max = tmp; } 1337 } 1338 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1339 CHKPTRQ(mat->rowvalues); 1340 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1341 } 1342 1343 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1344 lrow = row - brstart; 1345 1346 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1347 if (!v) {pvA = 0; pvB = 0;} 1348 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1349 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1350 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1351 nztot = nzA + nzB; 1352 1353 cmap = mat->garray; 1354 if (v || idx) { 1355 if (nztot) { 1356 /* Sort by increasing column numbers, assuming A and B already sorted */ 1357 int imark = -1; 1358 if (v) { 1359 *v = v_p = mat->rowvalues; 1360 for ( i=0; i<nzB; i++ ) { 1361 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1362 else break; 1363 } 1364 imark = i; 1365 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1366 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1367 } 1368 if (idx) { 1369 *idx = idx_p = mat->rowindices; 1370 if (imark > -1) { 1371 for ( i=0; i<imark; i++ ) { 1372 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1373 } 1374 } else { 1375 for ( i=0; i<nzB; i++ ) { 1376 if (cmap[cworkB[i]/bs] < cstart) 1377 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1378 else break; 1379 } 1380 imark = i; 1381 } 1382 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1383 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1384 } 1385 } else { 1386 if (idx) *idx = 0; 1387 if (v) *v = 0; 1388 } 1389 } 1390 *nz = nztot; 1391 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1392 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #undef __FUNC__ 1397 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1398 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1399 { 1400 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1401 1402 PetscFunctionBegin; 1403 if (baij->getrowactive == PETSC_FALSE) { 1404 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1405 } 1406 baij->getrowactive = PETSC_FALSE; 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNC__ 1411 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1412 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1413 { 1414 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1415 1416 PetscFunctionBegin; 1417 *bs = baij->bs; 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNC__ 1422 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1423 int MatZeroEntries_MPIBAIJ(Mat A) 1424 { 1425 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1426 int ierr; 1427 1428 PetscFunctionBegin; 1429 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1430 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNC__ 1435 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1436 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1437 { 1438 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1439 Mat A = a->A, B = a->B; 1440 int ierr; 1441 double isend[5], irecv[5]; 1442 1443 PetscFunctionBegin; 1444 info->block_size = (double)a->bs; 1445 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1446 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1447 isend[3] = info->memory; isend[4] = info->mallocs; 1448 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1449 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1450 isend[3] += info->memory; isend[4] += info->mallocs; 1451 if (flag == MAT_LOCAL) { 1452 info->nz_used = isend[0]; 1453 info->nz_allocated = isend[1]; 1454 info->nz_unneeded = isend[2]; 1455 info->memory = isend[3]; 1456 info->mallocs = isend[4]; 1457 } else if (flag == MAT_GLOBAL_MAX) { 1458 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1459 info->nz_used = irecv[0]; 1460 info->nz_allocated = irecv[1]; 1461 info->nz_unneeded = irecv[2]; 1462 info->memory = irecv[3]; 1463 info->mallocs = irecv[4]; 1464 } else if (flag == MAT_GLOBAL_SUM) { 1465 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1466 info->nz_used = irecv[0]; 1467 info->nz_allocated = irecv[1]; 1468 info->nz_unneeded = irecv[2]; 1469 info->memory = irecv[3]; 1470 info->mallocs = irecv[4]; 1471 } 1472 info->rows_global = (double)a->M; 1473 info->columns_global = (double)a->N; 1474 info->rows_local = (double)a->m; 1475 info->columns_local = (double)a->N; 1476 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1477 info->fill_ratio_needed = 0; 1478 info->factor_mallocs = 0; 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNC__ 1483 #define __FUNC__ "MatSetOption_MPIBAIJ" 1484 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1485 { 1486 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1487 1488 PetscFunctionBegin; 1489 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1490 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1491 op == MAT_COLUMNS_UNSORTED || 1492 op == MAT_COLUMNS_SORTED || 1493 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1494 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1495 MatSetOption(a->A,op); 1496 MatSetOption(a->B,op); 1497 } else if (op == MAT_ROW_ORIENTED) { 1498 a->roworiented = 1; 1499 MatSetOption(a->A,op); 1500 MatSetOption(a->B,op); 1501 } else if (op == MAT_ROWS_SORTED || 1502 op == MAT_ROWS_UNSORTED || 1503 op == MAT_SYMMETRIC || 1504 op == MAT_STRUCTURALLY_SYMMETRIC || 1505 op == MAT_YES_NEW_DIAGONALS || 1506 op == MAT_USE_HASH_TABLE) 1507 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1508 else if (op == MAT_COLUMN_ORIENTED) { 1509 a->roworiented = 0; 1510 MatSetOption(a->A,op); 1511 MatSetOption(a->B,op); 1512 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1513 a->donotstash = 1; 1514 } else if (op == MAT_NO_NEW_DIAGONALS) { 1515 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1516 } else if (op == MAT_USE_HASH_TABLE) { 1517 a->ht_flag = 1; 1518 } else { 1519 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNC__ 1525 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1526 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1527 { 1528 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1529 Mat_SeqBAIJ *Aloc; 1530 Mat B; 1531 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1532 int bs=baij->bs,mbs=baij->mbs; 1533 Scalar *a; 1534 1535 PetscFunctionBegin; 1536 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1537 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1538 CHKERRQ(ierr); 1539 1540 /* copy over the A part */ 1541 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1542 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1544 1545 for ( i=0; i<mbs; i++ ) { 1546 rvals[0] = bs*(baij->rstart + i); 1547 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1548 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1549 col = (baij->cstart+aj[j])*bs; 1550 for (k=0; k<bs; k++ ) { 1551 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1552 col++; a += bs; 1553 } 1554 } 1555 } 1556 /* copy over the B part */ 1557 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1558 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1559 for ( i=0; i<mbs; i++ ) { 1560 rvals[0] = bs*(baij->rstart + i); 1561 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1562 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1563 col = baij->garray[aj[j]]*bs; 1564 for (k=0; k<bs; k++ ) { 1565 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1566 col++; a += bs; 1567 } 1568 } 1569 } 1570 PetscFree(rvals); 1571 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1572 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1573 1574 if (matout != PETSC_NULL) { 1575 *matout = B; 1576 } else { 1577 PetscOps *Abops; 1578 MatOps Aops; 1579 1580 /* This isn't really an in-place transpose .... but free data structures from baij */ 1581 PetscFree(baij->rowners); 1582 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1583 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1584 if (baij->colmap) PetscFree(baij->colmap); 1585 if (baij->garray) PetscFree(baij->garray); 1586 if (baij->lvec) VecDestroy(baij->lvec); 1587 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1588 PetscFree(baij); 1589 1590 /* 1591 This is horrible, horrible code. We need to keep the 1592 A pointers for the bops and ops but copy everything 1593 else from C. 1594 */ 1595 Abops = A->bops; 1596 Aops = A->ops; 1597 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1598 A->bops = Abops; 1599 A->ops = Aops; 1600 1601 PetscHeaderDestroy(B); 1602 } 1603 PetscFunctionReturn(0); 1604 } 1605 1606 #undef __FUNC__ 1607 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1608 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1609 { 1610 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1611 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1612 int ierr,s1,s2,s3; 1613 1614 PetscFunctionBegin; 1615 if (ll) { 1616 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1617 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1618 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1619 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1620 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1621 } 1622 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1623 PetscFunctionReturn(0); 1624 } 1625 1626 #undef __FUNC__ 1627 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1628 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1629 { 1630 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1631 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1632 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1633 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1634 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1635 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1636 MPI_Comm comm = A->comm; 1637 MPI_Request *send_waits,*recv_waits; 1638 MPI_Status recv_status,*send_status; 1639 IS istmp; 1640 PetscTruth localdiag; 1641 1642 PetscFunctionBegin; 1643 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1644 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1645 1646 /* first count number of contributors to each processor */ 1647 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1648 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1649 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1650 for ( i=0; i<N; i++ ) { 1651 idx = rows[i]; 1652 found = 0; 1653 for ( j=0; j<size; j++ ) { 1654 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1655 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1656 } 1657 } 1658 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1659 } 1660 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1661 1662 /* inform other processors of number of messages and max length*/ 1663 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1664 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1665 nrecvs = work[rank]; 1666 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1667 nmax = work[rank]; 1668 PetscFree(work); 1669 1670 /* post receives: */ 1671 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1672 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1673 for ( i=0; i<nrecvs; i++ ) { 1674 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1675 } 1676 1677 /* do sends: 1678 1) starts[i] gives the starting index in svalues for stuff going to 1679 the ith processor 1680 */ 1681 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1682 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1683 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1684 starts[0] = 0; 1685 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1686 for ( i=0; i<N; i++ ) { 1687 svalues[starts[owner[i]]++] = rows[i]; 1688 } 1689 ISRestoreIndices(is,&rows); 1690 1691 starts[0] = 0; 1692 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1693 count = 0; 1694 for ( i=0; i<size; i++ ) { 1695 if (procs[i]) { 1696 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1697 } 1698 } 1699 PetscFree(starts); 1700 1701 base = owners[rank]*bs; 1702 1703 /* wait on receives */ 1704 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1705 source = lens + nrecvs; 1706 count = nrecvs; slen = 0; 1707 while (count) { 1708 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1709 /* unpack receives into our local space */ 1710 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1711 source[imdex] = recv_status.MPI_SOURCE; 1712 lens[imdex] = n; 1713 slen += n; 1714 count--; 1715 } 1716 PetscFree(recv_waits); 1717 1718 /* move the data into the send scatter */ 1719 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1720 count = 0; 1721 for ( i=0; i<nrecvs; i++ ) { 1722 values = rvalues + i*nmax; 1723 for ( j=0; j<lens[i]; j++ ) { 1724 lrows[count++] = values[j] - base; 1725 } 1726 } 1727 PetscFree(rvalues); PetscFree(lens); 1728 PetscFree(owner); PetscFree(nprocs); 1729 1730 /* actually zap the local rows */ 1731 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1732 PLogObjectParent(A,istmp); 1733 1734 /* 1735 Zero the required rows. If the "diagonal block" of the matrix 1736 is square and the user wishes to set the diagonal we use seperate 1737 code so that MatSetValues() is not called for each diagonal allocating 1738 new memory, thus calling lots of mallocs and slowing things down. 1739 1740 Contributed by: Mathew Knepley 1741 */ 1742 localdiag = PETSC_FALSE; 1743 if (diag && (l->A->M == l->A->N)) { 1744 localdiag = PETSC_TRUE; 1745 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1746 } else { 1747 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1748 } 1749 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1750 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1751 1752 if (diag && (localdiag == PETSC_FALSE)) { 1753 for ( i = 0; i < slen; i++ ) { 1754 row = lrows[i] + rstart_bs; 1755 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1756 } 1757 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1758 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1759 } 1760 PetscFree(lrows); 1761 1762 /* wait on sends */ 1763 if (nsends) { 1764 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1765 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1766 PetscFree(send_status); 1767 } 1768 PetscFree(send_waits); PetscFree(svalues); 1769 1770 PetscFunctionReturn(0); 1771 } 1772 1773 extern int MatPrintHelp_SeqBAIJ(Mat); 1774 #undef __FUNC__ 1775 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1776 int MatPrintHelp_MPIBAIJ(Mat A) 1777 { 1778 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1779 MPI_Comm comm = A->comm; 1780 static int called = 0; 1781 int ierr; 1782 1783 PetscFunctionBegin; 1784 if (!a->rank) { 1785 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1786 } 1787 if (called) {PetscFunctionReturn(0);} else called = 1; 1788 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1789 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 #undef __FUNC__ 1794 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1795 int MatSetUnfactored_MPIBAIJ(Mat A) 1796 { 1797 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1798 int ierr; 1799 1800 PetscFunctionBegin; 1801 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1802 PetscFunctionReturn(0); 1803 } 1804 1805 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1806 1807 /* -------------------------------------------------------------------*/ 1808 static struct _MatOps MatOps_Values = { 1809 MatSetValues_MPIBAIJ, 1810 MatGetRow_MPIBAIJ, 1811 MatRestoreRow_MPIBAIJ, 1812 MatMult_MPIBAIJ, 1813 MatMultAdd_MPIBAIJ, 1814 MatMultTrans_MPIBAIJ, 1815 MatMultTransAdd_MPIBAIJ, 1816 0, 1817 0, 1818 0, 1819 0, 1820 0, 1821 0, 1822 0, 1823 MatTranspose_MPIBAIJ, 1824 MatGetInfo_MPIBAIJ, 1825 0, 1826 MatGetDiagonal_MPIBAIJ, 1827 MatDiagonalScale_MPIBAIJ, 1828 MatNorm_MPIBAIJ, 1829 MatAssemblyBegin_MPIBAIJ, 1830 MatAssemblyEnd_MPIBAIJ, 1831 0, 1832 MatSetOption_MPIBAIJ, 1833 MatZeroEntries_MPIBAIJ, 1834 MatZeroRows_MPIBAIJ, 1835 0, 1836 0, 1837 0, 1838 0, 1839 MatGetSize_MPIBAIJ, 1840 MatGetLocalSize_MPIBAIJ, 1841 MatGetOwnershipRange_MPIBAIJ, 1842 0, 1843 0, 1844 0, 1845 0, 1846 MatConvertSameType_MPIBAIJ, 1847 0, 1848 0, 1849 0, 1850 0, 1851 0, 1852 MatGetSubMatrices_MPIBAIJ, 1853 MatIncreaseOverlap_MPIBAIJ, 1854 MatGetValues_MPIBAIJ, 1855 0, 1856 MatPrintHelp_MPIBAIJ, 1857 MatScale_MPIBAIJ, 1858 0, 1859 0, 1860 0, 1861 MatGetBlockSize_MPIBAIJ, 1862 0, 1863 0, 1864 0, 1865 0, 1866 0, 1867 0, 1868 MatSetUnfactored_MPIBAIJ, 1869 0, 1870 MatSetValuesBlocked_MPIBAIJ, 1871 0, 1872 0, 1873 0, 1874 MatGetMaps_Petsc}; 1875 1876 1877 #undef __FUNC__ 1878 #define __FUNC__ "MatCreateMPIBAIJ" 1879 /*@C 1880 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1881 (block compressed row). For good matrix assembly performance 1882 the user should preallocate the matrix storage by setting the parameters 1883 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1884 performance can be increased by more than a factor of 50. 1885 1886 Collective on MPI_Comm 1887 1888 Input Parameters: 1889 + comm - MPI communicator 1890 . bs - size of blockk 1891 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1892 This value should be the same as the local size used in creating the 1893 y vector for the matrix-vector product y = Ax. 1894 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1895 This value should be the same as the local size used in creating the 1896 x vector for the matrix-vector product y = Ax. 1897 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1898 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1899 . d_nz - number of block nonzeros per block row in diagonal portion of local 1900 submatrix (same for all local rows) 1901 . d_nzz - array containing the number of block nonzeros in the various block rows 1902 of the in diagonal portion of the local (possibly different for each block 1903 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1904 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1905 submatrix (same for all local rows). 1906 - o_nzz - array containing the number of nonzeros in the various block rows of the 1907 off-diagonal portion of the local submatrix (possibly different for 1908 each block row) or PETSC_NULL. 1909 1910 Output Parameter: 1911 . A - the matrix 1912 1913 Options Database Keys: 1914 . -mat_no_unroll - uses code that does not unroll the loops in the 1915 block calculations (much slower) 1916 . -mat_block_size - size of the blocks to use 1917 . -mat_mpi - use the parallel matrix data structures even on one processor 1918 (defaults to using SeqBAIJ format on one processor) 1919 1920 Notes: 1921 The user MUST specify either the local or global matrix dimensions 1922 (possibly both). 1923 1924 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1925 than it must be used on all processors that share the object for that argument. 1926 1927 Storage Information: 1928 For a square global matrix we define each processor's diagonal portion 1929 to be its local rows and the corresponding columns (a square submatrix); 1930 each processor's off-diagonal portion encompasses the remainder of the 1931 local matrix (a rectangular submatrix). 1932 1933 The user can specify preallocated storage for the diagonal part of 1934 the local submatrix with either d_nz or d_nnz (not both). Set 1935 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1936 memory allocation. Likewise, specify preallocated storage for the 1937 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1938 1939 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1940 the figure below we depict these three local rows and all columns (0-11). 1941 1942 .vb 1943 0 1 2 3 4 5 6 7 8 9 10 11 1944 ------------------- 1945 row 3 | o o o d d d o o o o o o 1946 row 4 | o o o d d d o o o o o o 1947 row 5 | o o o d d d o o o o o o 1948 ------------------- 1949 .ve 1950 1951 Thus, any entries in the d locations are stored in the d (diagonal) 1952 submatrix, and any entries in the o locations are stored in the 1953 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1954 stored simply in the MATSEQBAIJ format for compressed row storage. 1955 1956 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1957 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1958 In general, for PDE problems in which most nonzeros are near the diagonal, 1959 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1960 or you will get TERRIBLE performance; see the users' manual chapter on 1961 matrices. 1962 1963 .keywords: matrix, block, aij, compressed row, sparse, parallel 1964 1965 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1966 @*/ 1967 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1968 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1969 { 1970 Mat B; 1971 Mat_MPIBAIJ *b; 1972 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1973 int flag1 = 0,flag2 = 0; 1974 1975 PetscFunctionBegin; 1976 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1977 1978 MPI_Comm_size(comm,&size); 1979 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 1980 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1981 if (!flag1 && !flag2 && size == 1) { 1982 if (M == PETSC_DECIDE) M = m; 1983 if (N == PETSC_DECIDE) N = n; 1984 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1985 PetscFunctionReturn(0); 1986 } 1987 1988 *A = 0; 1989 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1990 PLogObjectCreate(B); 1991 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1992 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1993 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1994 1995 B->ops->destroy = MatDestroy_MPIBAIJ; 1996 B->ops->view = MatView_MPIBAIJ; 1997 B->mapping = 0; 1998 B->factor = 0; 1999 B->assembled = PETSC_FALSE; 2000 2001 B->insertmode = NOT_SET_VALUES; 2002 MPI_Comm_rank(comm,&b->rank); 2003 MPI_Comm_size(comm,&b->size); 2004 2005 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2006 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2007 } 2008 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2009 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2010 } 2011 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2012 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2013 } 2014 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2015 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2016 2017 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2018 work[0] = m; work[1] = n; 2019 mbs = m/bs; nbs = n/bs; 2020 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2021 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2022 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2023 } 2024 if (m == PETSC_DECIDE) { 2025 Mbs = M/bs; 2026 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2027 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2028 m = mbs*bs; 2029 } 2030 if (n == PETSC_DECIDE) { 2031 Nbs = N/bs; 2032 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2033 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2034 n = nbs*bs; 2035 } 2036 if (mbs*bs != m || nbs*bs != n) { 2037 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2038 } 2039 2040 b->m = m; B->m = m; 2041 b->n = n; B->n = n; 2042 b->N = N; B->N = N; 2043 b->M = M; B->M = M; 2044 b->bs = bs; 2045 b->bs2 = bs*bs; 2046 b->mbs = mbs; 2047 b->nbs = nbs; 2048 b->Mbs = Mbs; 2049 b->Nbs = Nbs; 2050 2051 /* the information in the maps duplicates the information computed below, eventually 2052 we should remove the duplicate information that is not contained in the maps */ 2053 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2054 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2055 2056 /* build local table of row and column ownerships */ 2057 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 2058 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2059 b->cowners = b->rowners + b->size + 2; 2060 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2061 b->rowners[0] = 0; 2062 for ( i=2; i<=b->size; i++ ) { 2063 b->rowners[i] += b->rowners[i-1]; 2064 } 2065 b->rstart = b->rowners[b->rank]; 2066 b->rend = b->rowners[b->rank+1]; 2067 b->rstart_bs = b->rstart * bs; 2068 b->rend_bs = b->rend * bs; 2069 2070 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2071 b->cowners[0] = 0; 2072 for ( i=2; i<=b->size; i++ ) { 2073 b->cowners[i] += b->cowners[i-1]; 2074 } 2075 b->cstart = b->cowners[b->rank]; 2076 b->cend = b->cowners[b->rank+1]; 2077 b->cstart_bs = b->cstart * bs; 2078 b->cend_bs = b->cend * bs; 2079 2080 2081 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2082 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 2083 PLogObjectParent(B,b->A); 2084 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2085 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 2086 PLogObjectParent(B,b->B); 2087 2088 /* build cache for off array entries formed */ 2089 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 2090 b->donotstash = 0; 2091 b->colmap = 0; 2092 b->garray = 0; 2093 b->roworiented = 1; 2094 2095 /* stuff used in block assembly */ 2096 b->barray = 0; 2097 2098 /* stuff used for matrix vector multiply */ 2099 b->lvec = 0; 2100 b->Mvctx = 0; 2101 2102 /* stuff for MatGetRow() */ 2103 b->rowindices = 0; 2104 b->rowvalues = 0; 2105 b->getrowactive = PETSC_FALSE; 2106 2107 /* hash table stuff */ 2108 b->ht = 0; 2109 b->hd = 0; 2110 b->ht_size = 0; 2111 b->ht_flag = 0; 2112 b->ht_fact = 0; 2113 b->ht_total_ct = 0; 2114 b->ht_insert_ct = 0; 2115 2116 *A = B; 2117 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2118 if (flg) { 2119 double fact = 1.39; 2120 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2121 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2122 if (fact <= 1.0) fact = 1.39; 2123 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2124 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2125 } 2126 PetscFunctionReturn(0); 2127 } 2128 2129 #undef __FUNC__ 2130 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 2131 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 2132 { 2133 Mat mat; 2134 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2135 int ierr, len=0, flg; 2136 2137 PetscFunctionBegin; 2138 *newmat = 0; 2139 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 2140 PLogObjectCreate(mat); 2141 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2142 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2143 mat->ops->destroy = MatDestroy_MPIBAIJ; 2144 mat->ops->view = MatView_MPIBAIJ; 2145 mat->factor = matin->factor; 2146 mat->assembled = PETSC_TRUE; 2147 2148 a->m = mat->m = oldmat->m; 2149 a->n = mat->n = oldmat->n; 2150 a->M = mat->M = oldmat->M; 2151 a->N = mat->N = oldmat->N; 2152 2153 a->bs = oldmat->bs; 2154 a->bs2 = oldmat->bs2; 2155 a->mbs = oldmat->mbs; 2156 a->nbs = oldmat->nbs; 2157 a->Mbs = oldmat->Mbs; 2158 a->Nbs = oldmat->Nbs; 2159 2160 a->rstart = oldmat->rstart; 2161 a->rend = oldmat->rend; 2162 a->cstart = oldmat->cstart; 2163 a->cend = oldmat->cend; 2164 a->size = oldmat->size; 2165 a->rank = oldmat->rank; 2166 mat->insertmode = NOT_SET_VALUES; 2167 a->rowvalues = 0; 2168 a->getrowactive = PETSC_FALSE; 2169 a->barray = 0; 2170 2171 /* hash table stuff */ 2172 a->ht = 0; 2173 a->hd = 0; 2174 a->ht_size = 0; 2175 a->ht_flag = oldmat->ht_flag; 2176 a->ht_fact = oldmat->ht_fact; 2177 a->ht_total_ct = 0; 2178 a->ht_insert_ct = 0; 2179 2180 2181 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2182 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2183 a->cowners = a->rowners + a->size + 2; 2184 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2185 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 2186 if (oldmat->colmap) { 2187 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2188 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2189 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2190 } else a->colmap = 0; 2191 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2192 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2193 PLogObjectMemory(mat,len*sizeof(int)); 2194 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2195 } else a->garray = 0; 2196 2197 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2198 PLogObjectParent(mat,a->lvec); 2199 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2200 PLogObjectParent(mat,a->Mvctx); 2201 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 2202 PLogObjectParent(mat,a->A); 2203 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 2204 PLogObjectParent(mat,a->B); 2205 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2206 if (flg) { 2207 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2208 } 2209 *newmat = mat; 2210 PetscFunctionReturn(0); 2211 } 2212 2213 #include "sys.h" 2214 2215 #undef __FUNC__ 2216 #define __FUNC__ "MatLoad_MPIBAIJ" 2217 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2218 { 2219 Mat A; 2220 int i, nz, ierr, j,rstart, rend, fd; 2221 Scalar *vals,*buf; 2222 MPI_Comm comm = ((PetscObject)viewer)->comm; 2223 MPI_Status status; 2224 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2225 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2226 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2227 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2228 int dcount,kmax,k,nzcount,tmp; 2229 2230 PetscFunctionBegin; 2231 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2232 2233 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2234 if (!rank) { 2235 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2236 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2237 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2238 if (header[3] < 0) { 2239 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2240 } 2241 } 2242 2243 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2244 M = header[1]; N = header[2]; 2245 2246 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2247 2248 /* 2249 This code adds extra rows to make sure the number of rows is 2250 divisible by the blocksize 2251 */ 2252 Mbs = M/bs; 2253 extra_rows = bs - M + bs*(Mbs); 2254 if (extra_rows == bs) extra_rows = 0; 2255 else Mbs++; 2256 if (extra_rows &&!rank) { 2257 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2258 } 2259 2260 /* determine ownership of all rows */ 2261 mbs = Mbs/size + ((Mbs % size) > rank); 2262 m = mbs * bs; 2263 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2264 browners = rowners + size + 1; 2265 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2266 rowners[0] = 0; 2267 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2268 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2269 rstart = rowners[rank]; 2270 rend = rowners[rank+1]; 2271 2272 /* distribute row lengths to all processors */ 2273 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2274 if (!rank) { 2275 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2276 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2277 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2278 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2279 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2280 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2281 PetscFree(sndcounts); 2282 } else { 2283 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2284 } 2285 2286 if (!rank) { 2287 /* calculate the number of nonzeros on each processor */ 2288 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2289 PetscMemzero(procsnz,size*sizeof(int)); 2290 for ( i=0; i<size; i++ ) { 2291 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2292 procsnz[i] += rowlengths[j]; 2293 } 2294 } 2295 PetscFree(rowlengths); 2296 2297 /* determine max buffer needed and allocate it */ 2298 maxnz = 0; 2299 for ( i=0; i<size; i++ ) { 2300 maxnz = PetscMax(maxnz,procsnz[i]); 2301 } 2302 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2303 2304 /* read in my part of the matrix column indices */ 2305 nz = procsnz[0]; 2306 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2307 mycols = ibuf; 2308 if (size == 1) nz -= extra_rows; 2309 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2310 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2311 2312 /* read in every ones (except the last) and ship off */ 2313 for ( i=1; i<size-1; i++ ) { 2314 nz = procsnz[i]; 2315 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2316 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2317 } 2318 /* read in the stuff for the last proc */ 2319 if ( size != 1 ) { 2320 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2321 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2322 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2323 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2324 } 2325 PetscFree(cols); 2326 } else { 2327 /* determine buffer space needed for message */ 2328 nz = 0; 2329 for ( i=0; i<m; i++ ) { 2330 nz += locrowlens[i]; 2331 } 2332 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2333 mycols = ibuf; 2334 /* receive message of column indices*/ 2335 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2336 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2337 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2338 } 2339 2340 /* loop over local rows, determining number of off diagonal entries */ 2341 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2342 odlens = dlens + (rend-rstart); 2343 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2344 PetscMemzero(mask,3*Mbs*sizeof(int)); 2345 masked1 = mask + Mbs; 2346 masked2 = masked1 + Mbs; 2347 rowcount = 0; nzcount = 0; 2348 for ( i=0; i<mbs; i++ ) { 2349 dcount = 0; 2350 odcount = 0; 2351 for ( j=0; j<bs; j++ ) { 2352 kmax = locrowlens[rowcount]; 2353 for ( k=0; k<kmax; k++ ) { 2354 tmp = mycols[nzcount++]/bs; 2355 if (!mask[tmp]) { 2356 mask[tmp] = 1; 2357 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2358 else masked1[dcount++] = tmp; 2359 } 2360 } 2361 rowcount++; 2362 } 2363 2364 dlens[i] = dcount; 2365 odlens[i] = odcount; 2366 2367 /* zero out the mask elements we set */ 2368 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2369 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2370 } 2371 2372 /* create our matrix */ 2373 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2374 CHKERRQ(ierr); 2375 A = *newmat; 2376 MatSetOption(A,MAT_COLUMNS_SORTED); 2377 2378 if (!rank) { 2379 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2380 /* read in my part of the matrix numerical values */ 2381 nz = procsnz[0]; 2382 vals = buf; 2383 mycols = ibuf; 2384 if (size == 1) nz -= extra_rows; 2385 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2386 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2387 2388 /* insert into matrix */ 2389 jj = rstart*bs; 2390 for ( i=0; i<m; i++ ) { 2391 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2392 mycols += locrowlens[i]; 2393 vals += locrowlens[i]; 2394 jj++; 2395 } 2396 /* read in other processors (except the last one) and ship out */ 2397 for ( i=1; i<size-1; i++ ) { 2398 nz = procsnz[i]; 2399 vals = buf; 2400 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2401 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2402 } 2403 /* the last proc */ 2404 if ( size != 1 ){ 2405 nz = procsnz[i] - extra_rows; 2406 vals = buf; 2407 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2408 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2409 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2410 } 2411 PetscFree(procsnz); 2412 } else { 2413 /* receive numeric values */ 2414 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2415 2416 /* receive message of values*/ 2417 vals = buf; 2418 mycols = ibuf; 2419 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2420 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2421 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2422 2423 /* insert into matrix */ 2424 jj = rstart*bs; 2425 for ( i=0; i<m; i++ ) { 2426 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2427 mycols += locrowlens[i]; 2428 vals += locrowlens[i]; 2429 jj++; 2430 } 2431 } 2432 PetscFree(locrowlens); 2433 PetscFree(buf); 2434 PetscFree(ibuf); 2435 PetscFree(rowners); 2436 PetscFree(dlens); 2437 PetscFree(mask); 2438 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2439 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 2444 2445 #undef __FUNC__ 2446 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2447 /*@ 2448 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2449 2450 Input Parameters: 2451 . mat - the matrix 2452 . fact - factor 2453 2454 Collective on Mat 2455 2456 Notes: 2457 This can also be set by the command line option: -mat_use_hash_table fact 2458 2459 .keywords: matrix, hashtable, factor, HT 2460 2461 .seealso: MatSetOption() 2462 @*/ 2463 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2464 { 2465 Mat_MPIBAIJ *baij; 2466 2467 PetscFunctionBegin; 2468 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2469 if (mat->type != MATMPIBAIJ) { 2470 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2471 } 2472 baij = (Mat_MPIBAIJ*) mat->data; 2473 baij->ht_fact = fact; 2474 PetscFunctionReturn(0); 2475 } 2476