1 #ifndef lint 2 static char vcid[] = "$Id: mpibaij.c,v 1.68 1997/05/03 22:43:28 curfman Exp curfman $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 15 extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 16 extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 17 extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 18 extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 19 extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 20 extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 21 extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 22 23 24 /* 25 Local utility routine that creates a mapping from the global column 26 number to the local number in the off-diagonal part of the local 27 storage of the matrix. This is done in a non scable way since the 28 length of colmap equals the global matrix length. 29 */ 30 #undef __FUNC__ 31 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 32 static int CreateColmap_MPIBAIJ_Private(Mat mat) 33 { 34 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 35 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 36 int nbs = B->nbs,i,bs=B->bs;; 37 38 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 39 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 40 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 41 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 42 return 0; 43 } 44 45 #undef __FUNC__ 46 #define __FUNC__ "MatGetRowIJ_MPIBAIJ(" 47 static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 48 PetscTruth *done) 49 { 50 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 51 int ierr; 52 if (aij->size == 1) { 53 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54 } else SETERRQ(1,0,"not supported in parallel"); 55 return 0; 56 } 57 58 #undef __FUNC__ 59 #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ" 60 static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 61 PetscTruth *done) 62 { 63 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 64 int ierr; 65 if (aij->size == 1) { 66 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 67 } else SETERRQ(1,0,"not supported in parallel"); 68 return 0; 69 } 70 #define CHUNKSIZE 10 71 72 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 73 { \ 74 \ 75 brow = row/bs; \ 76 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 77 rmax = aimax[brow]; nrow = ailen[brow]; \ 78 bcol = col/bs; \ 79 ridx = row % bs; cidx = col % bs; \ 80 low = 0; high = nrow; \ 81 while (high-low > 3) { \ 82 t = (low+high)/2; \ 83 if (rp[t] > bcol) high = t; \ 84 else low = t; \ 85 } \ 86 for ( _i=low; _i<high; _i++ ) { \ 87 if (rp[_i] > bcol) break; \ 88 if (rp[_i] == bcol) { \ 89 bap = ap + bs2*_i + bs*cidx + ridx; \ 90 if (addv == ADD_VALUES) *bap += value; \ 91 else *bap = value; \ 92 goto a_noinsert; \ 93 } \ 94 } \ 95 if (a->nonew == 1) goto a_noinsert; \ 96 else if (a->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 97 if (nrow >= rmax) { \ 98 /* there is no extra room in row, therefore enlarge */ \ 99 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 100 Scalar *new_a; \ 101 \ 102 if (a->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 103 \ 104 /* malloc new storage space */ \ 105 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 106 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 107 new_j = (int *) (new_a + bs2*new_nz); \ 108 new_i = new_j + new_nz; \ 109 \ 110 /* copy over old data into new slots */ \ 111 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 112 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 113 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 114 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 115 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 116 len*sizeof(int)); \ 117 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 118 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 119 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 120 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 121 /* free up old matrix storage */ \ 122 PetscFree(a->a); \ 123 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 124 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 125 a->singlemalloc = 1; \ 126 \ 127 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 128 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 129 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 130 a->maxnz += bs2*CHUNKSIZE; \ 131 a->reallocs++; \ 132 a->nz++; \ 133 } \ 134 N = nrow++ - 1; \ 135 /* shift up all the later entries in this row */ \ 136 for ( ii=N; ii>=_i; ii-- ) { \ 137 rp[ii+1] = rp[ii]; \ 138 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 139 } \ 140 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 141 rp[_i] = bcol; \ 142 ap[bs2*_i + bs*cidx + ridx] = value; \ 143 a_noinsert:; \ 144 ailen[brow] = nrow; \ 145 } 146 147 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 148 { \ 149 \ 150 brow = row/bs; \ 151 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152 rmax = bimax[brow]; nrow = bilen[brow]; \ 153 bcol = col/bs; \ 154 ridx = row % bs; cidx = col % bs; \ 155 low = 0; high = nrow; \ 156 while (high-low > 3) { \ 157 t = (low+high)/2; \ 158 if (rp[t] > bcol) high = t; \ 159 else low = t; \ 160 } \ 161 for ( _i=low; _i<high; _i++ ) { \ 162 if (rp[_i] > bcol) break; \ 163 if (rp[_i] == bcol) { \ 164 bap = ap + bs2*_i + bs*cidx + ridx; \ 165 if (addv == ADD_VALUES) *bap += value; \ 166 else *bap = value; \ 167 goto b_noinsert; \ 168 } \ 169 } \ 170 if (b->nonew == 1) goto b_noinsert; \ 171 else if (b->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 172 if (nrow >= rmax) { \ 173 /* there is no extra room in row, therefore enlarge */ \ 174 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 175 Scalar *new_a; \ 176 \ 177 if (b->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 178 \ 179 /* malloc new storage space */ \ 180 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 181 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 182 new_j = (int *) (new_a + bs2*new_nz); \ 183 new_i = new_j + new_nz; \ 184 \ 185 /* copy over old data into new slots */ \ 186 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 187 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 188 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 189 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 190 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 191 len*sizeof(int)); \ 192 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 193 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 194 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 195 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 196 /* free up old matrix storage */ \ 197 PetscFree(b->a); \ 198 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 199 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 200 b->singlemalloc = 1; \ 201 \ 202 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 203 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 204 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 205 b->maxnz += bs2*CHUNKSIZE; \ 206 b->reallocs++; \ 207 b->nz++; \ 208 } \ 209 N = nrow++ - 1; \ 210 /* shift up all the later entries in this row */ \ 211 for ( ii=N; ii>=_i; ii-- ) { \ 212 rp[ii+1] = rp[ii]; \ 213 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 214 } \ 215 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 216 rp[_i] = bcol; \ 217 ap[bs2*_i + bs*cidx + ridx] = value; \ 218 b_noinsert:; \ 219 bilen[brow] = nrow; \ 220 } 221 222 #undef __FUNC__ 223 #define __FUNC__ "MatSetValues_MPIBAIJ" 224 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 225 { 226 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 227 Scalar value; 228 int ierr,i,j,row,col; 229 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 230 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 231 int cend_orig=baij->cend_bs,bs=baij->bs; 232 233 /* Some Variables required in the macro */ 234 Mat A = baij->A; 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 236 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 237 Scalar *aa=a->a; 238 239 Mat B = baij->B; 240 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 241 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 242 Scalar *ba=b->a; 243 244 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 245 int low,high,t,ridx,cidx,bs2=a->bs2; 246 Scalar *ap,*bap; 247 248 for ( i=0; i<m; i++ ) { 249 #if defined(PETSC_BOPT_g) 250 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 251 if (im[i] >= baij->M) SETERRQ(1,0,"Row too large"); 252 #endif 253 if (im[i] >= rstart_orig && im[i] < rend_orig) { 254 row = im[i] - rstart_orig; 255 for ( j=0; j<n; j++ ) { 256 if (in[j] >= cstart_orig && in[j] < cend_orig){ 257 col = in[j] - cstart_orig; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 #if defined(PETSC_BOPT_g) 263 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 264 else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");} 265 #endif 266 else { 267 if (mat->was_assembled) { 268 if (!baij->colmap) { 269 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 270 } 271 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 272 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 273 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 274 col = in[j]; 275 } 276 } 277 else col = in[j]; 278 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 279 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 280 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 281 } 282 } 283 } 284 else { 285 if (roworiented && !baij->donotstash) { 286 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 287 } 288 else { 289 if (!baij->donotstash) { 290 row = im[i]; 291 for ( j=0; j<n; j++ ) { 292 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 293 } 294 } 295 } 296 } 297 } 298 return 0; 299 } 300 301 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 302 #undef __FUNC__ 303 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 304 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 305 { 306 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 307 Scalar *value,*barray=baij->barray; 308 int ierr,i,j,ii,jj,row,col,k,l; 309 int roworiented = baij->roworiented,rstart=baij->rstart ; 310 int rend=baij->rend,cstart=baij->cstart,stepval; 311 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 312 313 314 if(!barray) { 315 barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 316 baij->barray = barray; 317 } 318 319 if (roworiented) { 320 stepval = (n-1)*bs; 321 } else { 322 stepval = (m-1)*bs; 323 } 324 for ( i=0; i<m; i++ ) { 325 #if defined(PETSC_BOPT_g) 326 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 327 if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large"); 328 #endif 329 if (im[i] >= rstart && im[i] < rend) { 330 row = im[i] - rstart; 331 for ( j=0; j<n; j++ ) { 332 /* If NumCol = 1 then a copy is not required */ 333 if ((roworiented) && (n == 1)) { 334 barray = v + i*bs2; 335 } else if((!roworiented) && (m == 1)) { 336 barray = v + j*bs2; 337 } else { /* Here a copy is required */ 338 if (roworiented) { 339 value = v + i*(stepval+bs)*bs + j*bs; 340 } else { 341 value = v + j*(stepval+bs)*bs + i*bs; 342 } 343 for ( ii=0; ii<bs; ii++,value+=stepval ) 344 for (jj=0; jj<bs; jj++ ) 345 *barray++ = *value++; 346 barray -=bs2; 347 } 348 349 if (in[j] >= cstart && in[j] < cend){ 350 col = in[j] - cstart; 351 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 352 } 353 #if defined(PETSC_BOPT_g) 354 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 355 else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");} 356 #endif 357 else { 358 if (mat->was_assembled) { 359 if (!baij->colmap) { 360 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 361 } 362 col = baij->colmap[in[j]] - 1; 363 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 364 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 365 col = in[j]; 366 } 367 } 368 else col = in[j]; 369 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 370 } 371 } 372 } 373 else { 374 if (!baij->donotstash) { 375 if (roworiented ) { 376 row = im[i]*bs; 377 value = v + i*(stepval+bs)*bs; 378 for ( j=0; j<bs; j++,row++ ) { 379 for ( k=0; k<n; k++ ) { 380 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 381 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 382 } 383 } 384 } 385 } 386 else { 387 for ( j=0; j<n; j++ ) { 388 value = v + j*(stepval+bs)*bs + i*bs; 389 col = in[j]*bs; 390 for ( k=0; k<bs; k++,col++,value+=stepval) { 391 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 392 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 393 } 394 } 395 } 396 397 } 398 } 399 } 400 } 401 return 0; 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatGetValues_MPIBAIJ" 406 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 407 { 408 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 409 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 410 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 411 412 for ( i=0; i<m; i++ ) { 413 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 414 if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large"); 415 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 416 row = idxm[i] - bsrstart; 417 for ( j=0; j<n; j++ ) { 418 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 419 if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large"); 420 if (idxn[j] >= bscstart && idxn[j] < bscend){ 421 col = idxn[j] - bscstart; 422 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 423 } 424 else { 425 if (!baij->colmap) { 426 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 427 } 428 if((baij->colmap[idxn[j]/bs]-1 < 0) || 429 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 430 else { 431 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 432 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 433 } 434 } 435 } 436 } 437 else { 438 SETERRQ(1,0,"Only local values currently supported"); 439 } 440 } 441 return 0; 442 } 443 444 #undef __FUNC__ 445 #define __FUNC__ "MatNorm_MPIBAIJ" 446 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 447 { 448 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 449 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 450 int ierr, i,bs2=baij->bs2; 451 double sum = 0.0; 452 Scalar *v; 453 454 if (baij->size == 1) { 455 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 456 } else { 457 if (type == NORM_FROBENIUS) { 458 v = amat->a; 459 for (i=0; i<amat->nz*bs2; i++ ) { 460 #if defined(PETSC_COMPLEX) 461 sum += real(conj(*v)*(*v)); v++; 462 #else 463 sum += (*v)*(*v); v++; 464 #endif 465 } 466 v = bmat->a; 467 for (i=0; i<bmat->nz*bs2; i++ ) { 468 #if defined(PETSC_COMPLEX) 469 sum += real(conj(*v)*(*v)); v++; 470 #else 471 sum += (*v)*(*v); v++; 472 #endif 473 } 474 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 475 *norm = sqrt(*norm); 476 } 477 else 478 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 479 } 480 return 0; 481 } 482 483 #undef __FUNC__ 484 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 485 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 486 { 487 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 488 MPI_Comm comm = mat->comm; 489 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 490 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 491 MPI_Request *send_waits,*recv_waits; 492 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 493 InsertMode addv; 494 Scalar *rvalues,*svalues; 495 496 /* make sure all processors are either in INSERTMODE or ADDMODE */ 497 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 498 if (addv == (ADD_VALUES|INSERT_VALUES)) { 499 SETERRQ(1,0,"Some processors inserted others added"); 500 } 501 mat->insertmode = addv; /* in case this processor had no cache */ 502 503 /* first count number of contributors to each processor */ 504 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 505 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 506 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 507 for ( i=0; i<baij->stash.n; i++ ) { 508 idx = baij->stash.idx[i]; 509 for ( j=0; j<size; j++ ) { 510 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 511 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 512 } 513 } 514 } 515 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 516 517 /* inform other processors of number of messages and max length*/ 518 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 519 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 520 nreceives = work[rank]; 521 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 522 nmax = work[rank]; 523 PetscFree(work); 524 525 /* post receives: 526 1) each message will consist of ordered pairs 527 (global index,value) we store the global index as a double 528 to simplify the message passing. 529 2) since we don't know how long each individual message is we 530 allocate the largest needed buffer for each receive. Potentially 531 this is a lot of wasted space. 532 533 534 This could be done better. 535 */ 536 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 537 CHKPTRQ(rvalues); 538 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 539 CHKPTRQ(recv_waits); 540 for ( i=0; i<nreceives; i++ ) { 541 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 542 comm,recv_waits+i); 543 } 544 545 /* do sends: 546 1) starts[i] gives the starting index in svalues for stuff going to 547 the ith processor 548 */ 549 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 550 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 551 CHKPTRQ(send_waits); 552 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 553 starts[0] = 0; 554 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 555 for ( i=0; i<baij->stash.n; i++ ) { 556 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 557 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 558 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 559 } 560 PetscFree(owner); 561 starts[0] = 0; 562 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 563 count = 0; 564 for ( i=0; i<size; i++ ) { 565 if (procs[i]) { 566 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 567 comm,send_waits+count++); 568 } 569 } 570 PetscFree(starts); PetscFree(nprocs); 571 572 /* Free cache space */ 573 PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 574 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 575 576 baij->svalues = svalues; baij->rvalues = rvalues; 577 baij->nsends = nsends; baij->nrecvs = nreceives; 578 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 579 baij->rmax = nmax; 580 581 return 0; 582 } 583 584 585 #undef __FUNC__ 586 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 587 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 588 { 589 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 590 MPI_Status *send_status,recv_status; 591 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 592 int bs=baij->bs,row,col,other_disassembled; 593 Scalar *values,val; 594 InsertMode addv = mat->insertmode; 595 596 /* wait on receives */ 597 while (count) { 598 MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 599 /* unpack receives into our local space */ 600 values = baij->rvalues + 3*imdex*baij->rmax; 601 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 602 n = n/3; 603 for ( i=0; i<n; i++ ) { 604 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 605 col = (int) PetscReal(values[3*i+1]); 606 val = values[3*i+2]; 607 if (col >= baij->cstart*bs && col < baij->cend*bs) { 608 col -= baij->cstart*bs; 609 MatSetValues(baij->A,1,&row,1,&col,&val,addv); 610 } 611 else { 612 if (mat->was_assembled) { 613 if (!baij->colmap) { 614 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 615 } 616 col = (baij->colmap[col/bs]-1)*bs + col%bs; 617 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 618 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 619 col = (int) PetscReal(values[3*i+1]); 620 } 621 } 622 MatSetValues(baij->B,1,&row,1,&col,&val,addv); 623 } 624 } 625 count--; 626 } 627 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 628 629 /* wait on sends */ 630 if (baij->nsends) { 631 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 632 CHKPTRQ(send_status); 633 MPI_Waitall(baij->nsends,baij->send_waits,send_status); 634 PetscFree(send_status); 635 } 636 PetscFree(baij->send_waits); PetscFree(baij->svalues); 637 638 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 639 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 640 641 /* determine if any processor has disassembled, if so we must 642 also disassemble ourselfs, in order that we may reassemble. */ 643 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 644 if (mat->was_assembled && !other_disassembled) { 645 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 646 } 647 648 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 649 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 650 } 651 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 652 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 653 654 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 655 return 0; 656 } 657 658 #undef __FUNC__ 659 #define __FUNC__ "MatView_MPIBAIJ_Binary" 660 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 661 { 662 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 663 int ierr; 664 665 if (baij->size == 1) { 666 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 667 } 668 else SETERRQ(1,0,"Only uniprocessor output supported"); 669 return 0; 670 } 671 672 #undef __FUNC__ 673 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 674 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 675 { 676 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 677 int ierr, format,rank,bs = baij->bs; 678 FILE *fd; 679 ViewerType vtype; 680 681 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 682 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 683 ierr = ViewerGetFormat(viewer,&format); 684 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 685 MatInfo info; 686 MPI_Comm_rank(mat->comm,&rank); 687 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 688 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 689 PetscSequentialPhaseBegin(mat->comm,1); 690 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 691 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 692 baij->bs,(int)info.memory); 693 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 694 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 695 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 696 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 697 fflush(fd); 698 PetscSequentialPhaseEnd(mat->comm,1); 699 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 700 return 0; 701 } 702 else if (format == VIEWER_FORMAT_ASCII_INFO) { 703 PetscPrintf(mat->comm," block size is %d\n",bs); 704 return 0; 705 } 706 } 707 708 if (vtype == DRAW_VIEWER) { 709 Draw draw; 710 PetscTruth isnull; 711 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 712 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 713 } 714 715 if (vtype == ASCII_FILE_VIEWER) { 716 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 717 PetscSequentialPhaseBegin(mat->comm,1); 718 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 719 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 720 baij->cstart*bs,baij->cend*bs); 721 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 722 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 723 fflush(fd); 724 PetscSequentialPhaseEnd(mat->comm,1); 725 } 726 else { 727 int size = baij->size; 728 rank = baij->rank; 729 if (size == 1) { 730 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 731 } 732 else { 733 /* assemble the entire matrix onto first processor. */ 734 Mat A; 735 Mat_SeqBAIJ *Aloc; 736 int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 737 int mbs=baij->mbs; 738 Scalar *a; 739 740 if (!rank) { 741 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 742 CHKERRQ(ierr); 743 } 744 else { 745 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 746 CHKERRQ(ierr); 747 } 748 PLogObjectParent(mat,A); 749 750 /* copy over the A part */ 751 Aloc = (Mat_SeqBAIJ*) baij->A->data; 752 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 753 row = baij->rstart; 754 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 755 756 for ( i=0; i<mbs; i++ ) { 757 rvals[0] = bs*(baij->rstart + i); 758 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 759 for ( j=ai[i]; j<ai[i+1]; j++ ) { 760 col = (baij->cstart+aj[j])*bs; 761 for (k=0; k<bs; k++ ) { 762 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 763 col++; a += bs; 764 } 765 } 766 } 767 /* copy over the B part */ 768 Aloc = (Mat_SeqBAIJ*) baij->B->data; 769 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 770 row = baij->rstart*bs; 771 for ( i=0; i<mbs; i++ ) { 772 rvals[0] = bs*(baij->rstart + i); 773 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 774 for ( j=ai[i]; j<ai[i+1]; j++ ) { 775 col = baij->garray[aj[j]]*bs; 776 for (k=0; k<bs; k++ ) { 777 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 778 col++; a += bs; 779 } 780 } 781 } 782 PetscFree(rvals); 783 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 784 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 785 if (!rank) { 786 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 787 } 788 ierr = MatDestroy(A); CHKERRQ(ierr); 789 } 790 } 791 return 0; 792 } 793 794 795 796 #undef __FUNC__ 797 #define __FUNC__ "MatView_MPIBAIJ" 798 int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 799 { 800 Mat mat = (Mat) obj; 801 int ierr; 802 ViewerType vtype; 803 804 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 805 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 806 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 807 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 808 } 809 else if (vtype == BINARY_FILE_VIEWER) { 810 return MatView_MPIBAIJ_Binary(mat,viewer); 811 } 812 return 0; 813 } 814 815 #undef __FUNC__ 816 #define __FUNC__ "MatDestroy_MPIBAIJ" 817 int MatDestroy_MPIBAIJ(PetscObject obj) 818 { 819 Mat mat = (Mat) obj; 820 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 821 int ierr; 822 823 #if defined(PETSC_LOG) 824 PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 825 #endif 826 827 PetscFree(baij->rowners); 828 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 829 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 830 if (baij->colmap) PetscFree(baij->colmap); 831 if (baij->garray) PetscFree(baij->garray); 832 if (baij->lvec) VecDestroy(baij->lvec); 833 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 834 if (baij->rowvalues) PetscFree(baij->rowvalues); 835 if (baij->barray) PetscFree(baij->barray); 836 PetscFree(baij); 837 if (mat->mapping) { 838 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 839 } 840 PLogObjectDestroy(mat); 841 PetscHeaderDestroy(mat); 842 return 0; 843 } 844 845 #undef __FUNC__ 846 #define __FUNC__ "MatMult_MPIBAIJ" 847 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 848 { 849 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 850 int ierr, nt; 851 852 VecGetLocalSize_Fast(xx,nt); 853 if (nt != a->n) { 854 SETERRQ(1,0,"Incompatible partition of A and xx"); 855 } 856 VecGetLocalSize_Fast(yy,nt); 857 if (nt != a->m) { 858 SETERRQ(1,0,"Incompatible parition of A and yy"); 859 } 860 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 861 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 862 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 863 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 864 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 865 return 0; 866 } 867 868 #undef __FUNC__ 869 #define __FUNC__ "MatMultAdd_MPIBAIJ" 870 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 871 { 872 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 873 int ierr; 874 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 875 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 876 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 877 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 878 return 0; 879 } 880 881 #undef __FUNC__ 882 #define __FUNC__ "MatMultTrans_MPIBAIJ" 883 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 884 { 885 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 886 int ierr; 887 888 /* do nondiagonal part */ 889 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 890 /* send it on its way */ 891 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 892 /* do local part */ 893 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 894 /* receive remote parts: note this assumes the values are not actually */ 895 /* inserted in yy until the next line, which is true for my implementation*/ 896 /* but is not perhaps always true. */ 897 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 898 return 0; 899 } 900 901 #undef __FUNC__ 902 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 903 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 904 { 905 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 906 int ierr; 907 908 /* do nondiagonal part */ 909 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 910 /* send it on its way */ 911 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 912 /* do local part */ 913 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 914 /* receive remote parts: note this assumes the values are not actually */ 915 /* inserted in yy until the next line, which is true for my implementation*/ 916 /* but is not perhaps always true. */ 917 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 918 return 0; 919 } 920 921 /* 922 This only works correctly for square matrices where the subblock A->A is the 923 diagonal block 924 */ 925 #undef __FUNC__ 926 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 927 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 928 { 929 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 930 if (a->M != a->N) 931 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 932 return MatGetDiagonal(a->A,v); 933 } 934 935 #undef __FUNC__ 936 #define __FUNC__ "MatScale_MPIBAIJ" 937 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 938 { 939 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 940 int ierr; 941 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 942 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 943 return 0; 944 } 945 946 #undef __FUNC__ 947 #define __FUNC__ "MatGetSize_MPIBAIJ" 948 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 949 { 950 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 951 *m = mat->M; *n = mat->N; 952 return 0; 953 } 954 955 #undef __FUNC__ 956 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 957 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 958 { 959 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 960 *m = mat->m; *n = mat->N; 961 return 0; 962 } 963 964 #undef __FUNC__ 965 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 966 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 967 { 968 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 969 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 970 return 0; 971 } 972 973 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 974 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 975 976 #undef __FUNC__ 977 #define __FUNC__ "MatGetRow_MPIBAIJ" 978 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 979 { 980 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 981 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 982 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 983 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 984 int *cmap, *idx_p,cstart = mat->cstart; 985 986 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 987 mat->getrowactive = PETSC_TRUE; 988 989 if (!mat->rowvalues && (idx || v)) { 990 /* 991 allocate enough space to hold information from the longest row. 992 */ 993 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 994 int max = 1,mbs = mat->mbs,tmp; 995 for ( i=0; i<mbs; i++ ) { 996 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 997 if (max < tmp) { max = tmp; } 998 } 999 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1000 CHKPTRQ(mat->rowvalues); 1001 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1002 } 1003 1004 1005 if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows") 1006 lrow = row - brstart; 1007 1008 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1009 if (!v) {pvA = 0; pvB = 0;} 1010 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1011 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1012 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1013 nztot = nzA + nzB; 1014 1015 cmap = mat->garray; 1016 if (v || idx) { 1017 if (nztot) { 1018 /* Sort by increasing column numbers, assuming A and B already sorted */ 1019 int imark = -1; 1020 if (v) { 1021 *v = v_p = mat->rowvalues; 1022 for ( i=0; i<nzB; i++ ) { 1023 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1024 else break; 1025 } 1026 imark = i; 1027 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1028 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1029 } 1030 if (idx) { 1031 *idx = idx_p = mat->rowindices; 1032 if (imark > -1) { 1033 for ( i=0; i<imark; i++ ) { 1034 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1035 } 1036 } else { 1037 for ( i=0; i<nzB; i++ ) { 1038 if (cmap[cworkB[i]/bs] < cstart) 1039 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1040 else break; 1041 } 1042 imark = i; 1043 } 1044 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1045 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1046 } 1047 } 1048 else { 1049 if (idx) *idx = 0; 1050 if (v) *v = 0; 1051 } 1052 } 1053 *nz = nztot; 1054 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1055 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1056 return 0; 1057 } 1058 1059 #undef __FUNC__ 1060 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1061 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1062 { 1063 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1064 if (baij->getrowactive == PETSC_FALSE) { 1065 SETERRQ(1,0,"MatGetRow not called"); 1066 } 1067 baij->getrowactive = PETSC_FALSE; 1068 return 0; 1069 } 1070 1071 #undef __FUNC__ 1072 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1073 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1074 { 1075 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1076 *bs = baij->bs; 1077 return 0; 1078 } 1079 1080 #undef __FUNC__ 1081 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1082 int MatZeroEntries_MPIBAIJ(Mat A) 1083 { 1084 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1085 int ierr; 1086 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1087 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1088 return 0; 1089 } 1090 1091 #undef __FUNC__ 1092 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1093 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1094 { 1095 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1096 Mat A = a->A, B = a->B; 1097 int ierr; 1098 double isend[5], irecv[5]; 1099 1100 info->rows_global = (double)a->M; 1101 info->columns_global = (double)a->N; 1102 info->rows_local = (double)a->m; 1103 info->columns_local = (double)a->N; 1104 info->block_size = (double)a->bs; 1105 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1106 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1107 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1108 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1109 if (flag == MAT_LOCAL) { 1110 info->nz_used = isend[0]; 1111 info->nz_allocated = isend[1]; 1112 info->nz_unneeded = isend[2]; 1113 info->memory = isend[3]; 1114 info->mallocs = isend[4]; 1115 } else if (flag == MAT_GLOBAL_MAX) { 1116 MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm); 1117 info->nz_used = irecv[0]; 1118 info->nz_allocated = irecv[1]; 1119 info->nz_unneeded = irecv[2]; 1120 info->memory = irecv[3]; 1121 info->mallocs = irecv[4]; 1122 } else if (flag == MAT_GLOBAL_SUM) { 1123 MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm); 1124 info->nz_used = irecv[0]; 1125 info->nz_allocated = irecv[1]; 1126 info->nz_unneeded = irecv[2]; 1127 info->memory = irecv[3]; 1128 info->mallocs = irecv[4]; 1129 } 1130 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1131 info->fill_ratio_needed = 0; 1132 info->factor_mallocs = 0; 1133 return 0; 1134 } 1135 1136 #undef __FUNC__ 1137 #define __FUNC__ "MatSetOption_MPIBAIJ" 1138 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1139 { 1140 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1141 1142 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1143 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1144 op == MAT_COLUMNS_UNSORTED || 1145 op == MAT_COLUMNS_SORTED || 1146 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1147 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1148 MatSetOption(a->A,op); 1149 MatSetOption(a->B,op); 1150 } else if (op == MAT_ROW_ORIENTED) { 1151 a->roworiented = 1; 1152 MatSetOption(a->A,op); 1153 MatSetOption(a->B,op); 1154 } else if (op == MAT_ROWS_SORTED || 1155 op == MAT_ROWS_UNSORTED || 1156 op == MAT_SYMMETRIC || 1157 op == MAT_STRUCTURALLY_SYMMETRIC || 1158 op == MAT_YES_NEW_DIAGONALS) 1159 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1160 else if (op == MAT_COLUMN_ORIENTED) { 1161 a->roworiented = 0; 1162 MatSetOption(a->A,op); 1163 MatSetOption(a->B,op); 1164 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1165 a->donotstash = 1; 1166 } else if (op == MAT_NO_NEW_DIAGONALS) 1167 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1168 else 1169 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1170 return 0; 1171 } 1172 1173 #undef __FUNC__ 1174 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1175 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1176 { 1177 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1178 Mat_SeqBAIJ *Aloc; 1179 Mat B; 1180 int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 1181 int bs=baij->bs,mbs=baij->mbs; 1182 Scalar *a; 1183 1184 if (matout == PETSC_NULL && M != N) 1185 SETERRQ(1,0,"Square matrix only for in-place"); 1186 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1187 CHKERRQ(ierr); 1188 1189 /* copy over the A part */ 1190 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1191 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1192 row = baij->rstart; 1193 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1194 1195 for ( i=0; i<mbs; i++ ) { 1196 rvals[0] = bs*(baij->rstart + i); 1197 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1198 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1199 col = (baij->cstart+aj[j])*bs; 1200 for (k=0; k<bs; k++ ) { 1201 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1202 col++; a += bs; 1203 } 1204 } 1205 } 1206 /* copy over the B part */ 1207 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1208 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1209 row = baij->rstart*bs; 1210 for ( i=0; i<mbs; i++ ) { 1211 rvals[0] = bs*(baij->rstart + i); 1212 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1213 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1214 col = baij->garray[aj[j]]*bs; 1215 for (k=0; k<bs; k++ ) { 1216 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1217 col++; a += bs; 1218 } 1219 } 1220 } 1221 PetscFree(rvals); 1222 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1223 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1224 1225 if (matout != PETSC_NULL) { 1226 *matout = B; 1227 } else { 1228 /* This isn't really an in-place transpose .... but free data structures from baij */ 1229 PetscFree(baij->rowners); 1230 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1231 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1232 if (baij->colmap) PetscFree(baij->colmap); 1233 if (baij->garray) PetscFree(baij->garray); 1234 if (baij->lvec) VecDestroy(baij->lvec); 1235 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1236 PetscFree(baij); 1237 PetscMemcpy(A,B,sizeof(struct _Mat)); 1238 PetscHeaderDestroy(B); 1239 } 1240 return 0; 1241 } 1242 1243 #undef __FUNC__ 1244 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1245 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1246 { 1247 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1248 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1249 int ierr,s1,s2,s3; 1250 1251 if (ll) { 1252 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1253 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1254 if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes"); 1255 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1256 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1257 } 1258 if (rr) SETERRQ(1,0,"not supported for right vector"); 1259 return 0; 1260 } 1261 1262 /* the code does not do the diagonal entries correctly unless the 1263 matrix is square and the column and row owerships are identical. 1264 This is a BUG. The only way to fix it seems to be to access 1265 baij->A and baij->B directly and not through the MatZeroRows() 1266 routine. 1267 */ 1268 #undef __FUNC__ 1269 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1270 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1271 { 1272 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1273 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1274 int *procs,*nprocs,j,found,idx,nsends,*work; 1275 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1276 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1277 int *lens,imdex,*lrows,*values,bs=l->bs; 1278 MPI_Comm comm = A->comm; 1279 MPI_Request *send_waits,*recv_waits; 1280 MPI_Status recv_status,*send_status; 1281 IS istmp; 1282 1283 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1284 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1285 1286 /* first count number of contributors to each processor */ 1287 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1288 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1289 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1290 for ( i=0; i<N; i++ ) { 1291 idx = rows[i]; 1292 found = 0; 1293 for ( j=0; j<size; j++ ) { 1294 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1295 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1296 } 1297 } 1298 if (!found) SETERRQ(1,0,"Index out of range"); 1299 } 1300 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1301 1302 /* inform other processors of number of messages and max length*/ 1303 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1304 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 1305 nrecvs = work[rank]; 1306 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 1307 nmax = work[rank]; 1308 PetscFree(work); 1309 1310 /* post receives: */ 1311 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 1312 CHKPTRQ(rvalues); 1313 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 1314 CHKPTRQ(recv_waits); 1315 for ( i=0; i<nrecvs; i++ ) { 1316 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 1317 } 1318 1319 /* do sends: 1320 1) starts[i] gives the starting index in svalues for stuff going to 1321 the ith processor 1322 */ 1323 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1324 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 1325 CHKPTRQ(send_waits); 1326 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1327 starts[0] = 0; 1328 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1329 for ( i=0; i<N; i++ ) { 1330 svalues[starts[owner[i]]++] = rows[i]; 1331 } 1332 ISRestoreIndices(is,&rows); 1333 1334 starts[0] = 0; 1335 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1336 count = 0; 1337 for ( i=0; i<size; i++ ) { 1338 if (procs[i]) { 1339 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 1340 } 1341 } 1342 PetscFree(starts); 1343 1344 base = owners[rank]*bs; 1345 1346 /* wait on receives */ 1347 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1348 source = lens + nrecvs; 1349 count = nrecvs; slen = 0; 1350 while (count) { 1351 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 1352 /* unpack receives into our local space */ 1353 MPI_Get_count(&recv_status,MPI_INT,&n); 1354 source[imdex] = recv_status.MPI_SOURCE; 1355 lens[imdex] = n; 1356 slen += n; 1357 count--; 1358 } 1359 PetscFree(recv_waits); 1360 1361 /* move the data into the send scatter */ 1362 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1363 count = 0; 1364 for ( i=0; i<nrecvs; i++ ) { 1365 values = rvalues + i*nmax; 1366 for ( j=0; j<lens[i]; j++ ) { 1367 lrows[count++] = values[j] - base; 1368 } 1369 } 1370 PetscFree(rvalues); PetscFree(lens); 1371 PetscFree(owner); PetscFree(nprocs); 1372 1373 /* actually zap the local rows */ 1374 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1375 PLogObjectParent(A,istmp); 1376 PetscFree(lrows); 1377 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1378 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1379 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1380 1381 /* wait on sends */ 1382 if (nsends) { 1383 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 1384 CHKPTRQ(send_status); 1385 MPI_Waitall(nsends,send_waits,send_status); 1386 PetscFree(send_status); 1387 } 1388 PetscFree(send_waits); PetscFree(svalues); 1389 1390 return 0; 1391 } 1392 extern int MatPrintHelp_SeqBAIJ(Mat); 1393 #undef __FUNC__ 1394 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1395 int MatPrintHelp_MPIBAIJ(Mat A) 1396 { 1397 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1398 1399 if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1400 else return 0; 1401 } 1402 1403 #undef __FUNC__ 1404 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1405 int MatSetUnfactored_MPIBAIJ(Mat A) 1406 { 1407 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1408 int ierr; 1409 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1410 return 0; 1411 } 1412 1413 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1414 1415 /* -------------------------------------------------------------------*/ 1416 static struct _MatOps MatOps = { 1417 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1418 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1419 0,0,0,0, 1420 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1421 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1422 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1423 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1424 0,0,0,MatGetSize_MPIBAIJ, 1425 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1426 0,0,MatConvertSameType_MPIBAIJ,0,0, 1427 0,0,0,MatGetSubMatrices_MPIBAIJ, 1428 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1429 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1430 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1431 1432 1433 #undef __FUNC__ 1434 #define __FUNC__ "MatCreateMPIBAIJ" 1435 /*@C 1436 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1437 (block compressed row). For good matrix assembly performance 1438 the user should preallocate the matrix storage by setting the parameters 1439 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1440 performance can be increased by more than a factor of 50. 1441 1442 Input Parameters: 1443 . comm - MPI communicator 1444 . bs - size of blockk 1445 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1446 This value should be the same as the local size used in creating the 1447 y vector for the matrix-vector product y = Ax. 1448 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1449 This value should be the same as the local size used in creating the 1450 x vector for the matrix-vector product y = Ax. 1451 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1452 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1453 . d_nz - number of block nonzeros per block row in diagonal portion of local 1454 submatrix (same for all local rows) 1455 . d_nzz - array containing the number of block nonzeros in the various block rows 1456 of the in diagonal portion of the local (possibly different for each block 1457 row) or PETSC_NULL. You must leave room for the diagonal entry even if 1458 it is zero. 1459 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1460 submatrix (same for all local rows). 1461 . o_nzz - array containing the number of nonzeros in the various block rows of the 1462 off-diagonal portion of the local submatrix (possibly different for 1463 each block row) or PETSC_NULL. 1464 1465 Output Parameter: 1466 . A - the matrix 1467 1468 Notes: 1469 The user MUST specify either the local or global matrix dimensions 1470 (possibly both). 1471 1472 Storage Information: 1473 For a square global matrix we define each processor's diagonal portion 1474 to be its local rows and the corresponding columns (a square submatrix); 1475 each processor's off-diagonal portion encompasses the remainder of the 1476 local matrix (a rectangular submatrix). 1477 1478 The user can specify preallocated storage for the diagonal part of 1479 the local submatrix with either d_nz or d_nnz (not both). Set 1480 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1481 memory allocation. Likewise, specify preallocated storage for the 1482 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1483 1484 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1485 the figure below we depict these three local rows and all columns (0-11). 1486 1487 $ 0 1 2 3 4 5 6 7 8 9 10 11 1488 $ ------------------- 1489 $ row 3 | o o o d d d o o o o o o 1490 $ row 4 | o o o d d d o o o o o o 1491 $ row 5 | o o o d d d o o o o o o 1492 $ ------------------- 1493 $ 1494 1495 Thus, any entries in the d locations are stored in the d (diagonal) 1496 submatrix, and any entries in the o locations are stored in the 1497 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1498 stored simply in the MATSEQBAIJ format for compressed row storage. 1499 1500 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1501 and o_nz should indicate the number of nonzeros per row in the o matrix. 1502 In general, for PDE problems in which most nonzeros are near the diagonal, 1503 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1504 or you will get TERRIBLE performance; see the users' manual chapter on 1505 matrices. 1506 1507 .keywords: matrix, block, aij, compressed row, sparse, parallel 1508 1509 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 1510 @*/ 1511 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1512 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1513 { 1514 Mat B; 1515 Mat_MPIBAIJ *b; 1516 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 1517 1518 if (bs < 1) SETERRQ(1,0,"invalid block size specified"); 1519 *A = 0; 1520 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 1521 PLogObjectCreate(B); 1522 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1523 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1524 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1525 MPI_Comm_size(comm,&size); 1526 if (size == 1) { 1527 B->ops.getrowij = MatGetRowIJ_MPIBAIJ; 1528 B->ops.restorerowij = MatRestoreRowIJ_MPIBAIJ; 1529 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIBAIJ; 1530 B->ops.lufactornumeric = MatLUFactorNumeric_MPIBAIJ; 1531 B->ops.lufactor = MatLUFactor_MPIBAIJ; 1532 B->ops.solve = MatSolve_MPIBAIJ; 1533 B->ops.solveadd = MatSolveAdd_MPIBAIJ; 1534 B->ops.solvetrans = MatSolveTrans_MPIBAIJ; 1535 B->ops.solvetransadd = MatSolveTransAdd_MPIBAIJ; 1536 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ; 1537 } 1538 1539 B->destroy = MatDestroy_MPIBAIJ; 1540 B->view = MatView_MPIBAIJ; 1541 B->mapping = 0; 1542 B->factor = 0; 1543 B->assembled = PETSC_FALSE; 1544 1545 B->insertmode = NOT_SET_VALUES; 1546 MPI_Comm_rank(comm,&b->rank); 1547 MPI_Comm_size(comm,&b->size); 1548 1549 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1550 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1551 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified"); 1552 if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified"); 1553 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1554 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1555 1556 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1557 work[0] = m; work[1] = n; 1558 mbs = m/bs; nbs = n/bs; 1559 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1560 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1561 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1562 } 1563 if (m == PETSC_DECIDE) { 1564 Mbs = M/bs; 1565 if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize"); 1566 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1567 m = mbs*bs; 1568 } 1569 if (n == PETSC_DECIDE) { 1570 Nbs = N/bs; 1571 if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize"); 1572 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1573 n = nbs*bs; 1574 } 1575 if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize"); 1576 1577 b->m = m; B->m = m; 1578 b->n = n; B->n = n; 1579 b->N = N; B->N = N; 1580 b->M = M; B->M = M; 1581 b->bs = bs; 1582 b->bs2 = bs*bs; 1583 b->mbs = mbs; 1584 b->nbs = nbs; 1585 b->Mbs = Mbs; 1586 b->Nbs = Nbs; 1587 1588 /* build local table of row and column ownerships */ 1589 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1590 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 1591 b->cowners = b->rowners + b->size + 2; 1592 MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1593 b->rowners[0] = 0; 1594 for ( i=2; i<=b->size; i++ ) { 1595 b->rowners[i] += b->rowners[i-1]; 1596 } 1597 b->rstart = b->rowners[b->rank]; 1598 b->rend = b->rowners[b->rank+1]; 1599 b->rstart_bs = b->rstart * bs; 1600 b->rend_bs = b->rend * bs; 1601 1602 MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1603 b->cowners[0] = 0; 1604 for ( i=2; i<=b->size; i++ ) { 1605 b->cowners[i] += b->cowners[i-1]; 1606 } 1607 b->cstart = b->cowners[b->rank]; 1608 b->cend = b->cowners[b->rank+1]; 1609 b->cstart_bs = b->cstart * bs; 1610 b->cend_bs = b->cend * bs; 1611 1612 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1613 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1614 PLogObjectParent(B,b->A); 1615 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1616 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1617 PLogObjectParent(B,b->B); 1618 1619 /* build cache for off array entries formed */ 1620 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1621 b->donotstash = 0; 1622 b->colmap = 0; 1623 b->garray = 0; 1624 b->roworiented = 1; 1625 1626 /* stuff used in block assembly */ 1627 b->barray = 0; 1628 1629 /* stuff used for matrix vector multiply */ 1630 b->lvec = 0; 1631 b->Mvctx = 0; 1632 1633 /* stuff for MatGetRow() */ 1634 b->rowindices = 0; 1635 b->rowvalues = 0; 1636 b->getrowactive = PETSC_FALSE; 1637 1638 *A = B; 1639 return 0; 1640 } 1641 1642 #undef __FUNC__ 1643 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 1644 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 1645 { 1646 Mat mat; 1647 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 1648 int ierr, len=0, flg; 1649 1650 *newmat = 0; 1651 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 1652 PLogObjectCreate(mat); 1653 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 1654 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1655 mat->destroy = MatDestroy_MPIBAIJ; 1656 mat->view = MatView_MPIBAIJ; 1657 mat->factor = matin->factor; 1658 mat->assembled = PETSC_TRUE; 1659 1660 a->m = mat->m = oldmat->m; 1661 a->n = mat->n = oldmat->n; 1662 a->M = mat->M = oldmat->M; 1663 a->N = mat->N = oldmat->N; 1664 1665 a->bs = oldmat->bs; 1666 a->bs2 = oldmat->bs2; 1667 a->mbs = oldmat->mbs; 1668 a->nbs = oldmat->nbs; 1669 a->Mbs = oldmat->Mbs; 1670 a->Nbs = oldmat->Nbs; 1671 1672 a->rstart = oldmat->rstart; 1673 a->rend = oldmat->rend; 1674 a->cstart = oldmat->cstart; 1675 a->cend = oldmat->cend; 1676 a->size = oldmat->size; 1677 a->rank = oldmat->rank; 1678 mat->insertmode = NOT_SET_VALUES; 1679 a->rowvalues = 0; 1680 a->getrowactive = PETSC_FALSE; 1681 a->barray = 0; 1682 1683 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1684 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 1685 a->cowners = a->rowners + a->size + 2; 1686 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1687 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1688 if (oldmat->colmap) { 1689 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 1690 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1691 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 1692 } else a->colmap = 0; 1693 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 1694 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1695 PLogObjectMemory(mat,len*sizeof(int)); 1696 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1697 } else a->garray = 0; 1698 1699 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1700 PLogObjectParent(mat,a->lvec); 1701 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1702 PLogObjectParent(mat,a->Mvctx); 1703 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1704 PLogObjectParent(mat,a->A); 1705 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1706 PLogObjectParent(mat,a->B); 1707 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1708 if (flg) { 1709 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1710 } 1711 *newmat = mat; 1712 return 0; 1713 } 1714 1715 #include "sys.h" 1716 1717 #undef __FUNC__ 1718 #define __FUNC__ "MatLoad_MPIBAIJ" 1719 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 1720 { 1721 Mat A; 1722 int i, nz, ierr, j,rstart, rend, fd; 1723 Scalar *vals,*buf; 1724 MPI_Comm comm = ((PetscObject)viewer)->comm; 1725 MPI_Status status; 1726 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 1727 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 1728 int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 1729 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1730 int dcount,kmax,k,nzcount,tmp; 1731 1732 1733 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1734 bs2 = bs*bs; 1735 1736 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1737 if (!rank) { 1738 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1739 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1740 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1741 } 1742 1743 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1744 M = header[1]; N = header[2]; 1745 1746 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 1747 1748 /* 1749 This code adds extra rows to make sure the number of rows is 1750 divisible by the blocksize 1751 */ 1752 Mbs = M/bs; 1753 extra_rows = bs - M + bs*(Mbs); 1754 if (extra_rows == bs) extra_rows = 0; 1755 else Mbs++; 1756 if (extra_rows &&!rank) { 1757 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 1758 } 1759 1760 /* determine ownership of all rows */ 1761 mbs = Mbs/size + ((Mbs % size) > rank); 1762 m = mbs * bs; 1763 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1764 browners = rowners + size + 1; 1765 MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1766 rowners[0] = 0; 1767 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1768 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 1769 rstart = rowners[rank]; 1770 rend = rowners[rank+1]; 1771 1772 /* distribute row lengths to all processors */ 1773 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 1774 if (!rank) { 1775 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 1776 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1777 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1778 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1779 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1780 MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 1781 PetscFree(sndcounts); 1782 } 1783 else { 1784 MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 1785 } 1786 1787 if (!rank) { 1788 /* calculate the number of nonzeros on each processor */ 1789 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1790 PetscMemzero(procsnz,size*sizeof(int)); 1791 for ( i=0; i<size; i++ ) { 1792 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 1793 procsnz[i] += rowlengths[j]; 1794 } 1795 } 1796 PetscFree(rowlengths); 1797 1798 /* determine max buffer needed and allocate it */ 1799 maxnz = 0; 1800 for ( i=0; i<size; i++ ) { 1801 maxnz = PetscMax(maxnz,procsnz[i]); 1802 } 1803 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1804 1805 /* read in my part of the matrix column indices */ 1806 nz = procsnz[0]; 1807 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 1808 mycols = ibuf; 1809 if (size == 1) nz -= extra_rows; 1810 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1811 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1812 1813 /* read in every ones (except the last) and ship off */ 1814 for ( i=1; i<size-1; i++ ) { 1815 nz = procsnz[i]; 1816 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1817 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1818 } 1819 /* read in the stuff for the last proc */ 1820 if ( size != 1 ) { 1821 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 1822 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1823 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1824 MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 1825 } 1826 PetscFree(cols); 1827 } 1828 else { 1829 /* determine buffer space needed for message */ 1830 nz = 0; 1831 for ( i=0; i<m; i++ ) { 1832 nz += locrowlens[i]; 1833 } 1834 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 1835 mycols = ibuf; 1836 /* receive message of column indices*/ 1837 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1838 MPI_Get_count(&status,MPI_INT,&maxnz); 1839 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1840 } 1841 1842 /* loop over local rows, determining number of off diagonal entries */ 1843 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1844 odlens = dlens + (rend-rstart); 1845 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1846 PetscMemzero(mask,3*Mbs*sizeof(int)); 1847 masked1 = mask + Mbs; 1848 masked2 = masked1 + Mbs; 1849 rowcount = 0; nzcount = 0; 1850 for ( i=0; i<mbs; i++ ) { 1851 dcount = 0; 1852 odcount = 0; 1853 for ( j=0; j<bs; j++ ) { 1854 kmax = locrowlens[rowcount]; 1855 for ( k=0; k<kmax; k++ ) { 1856 tmp = mycols[nzcount++]/bs; 1857 if (!mask[tmp]) { 1858 mask[tmp] = 1; 1859 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 1860 else masked1[dcount++] = tmp; 1861 } 1862 } 1863 rowcount++; 1864 } 1865 1866 dlens[i] = dcount; 1867 odlens[i] = odcount; 1868 1869 /* zero out the mask elements we set */ 1870 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 1871 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 1872 } 1873 1874 /* create our matrix */ 1875 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1876 CHKERRQ(ierr); 1877 A = *newmat; 1878 MatSetOption(A,MAT_COLUMNS_SORTED); 1879 1880 if (!rank) { 1881 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 1882 /* read in my part of the matrix numerical values */ 1883 nz = procsnz[0]; 1884 vals = buf; 1885 mycols = ibuf; 1886 if (size == 1) nz -= extra_rows; 1887 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1888 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1889 1890 /* insert into matrix */ 1891 jj = rstart*bs; 1892 for ( i=0; i<m; i++ ) { 1893 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 1894 mycols += locrowlens[i]; 1895 vals += locrowlens[i]; 1896 jj++; 1897 } 1898 /* read in other processors (except the last one) and ship out */ 1899 for ( i=1; i<size-1; i++ ) { 1900 nz = procsnz[i]; 1901 vals = buf; 1902 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1903 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1904 } 1905 /* the last proc */ 1906 if ( size != 1 ){ 1907 nz = procsnz[i] - extra_rows; 1908 vals = buf; 1909 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1910 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1911 MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 1912 } 1913 PetscFree(procsnz); 1914 } 1915 else { 1916 /* receive numeric values */ 1917 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 1918 1919 /* receive message of values*/ 1920 vals = buf; 1921 mycols = ibuf; 1922 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1923 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1924 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1925 1926 /* insert into matrix */ 1927 jj = rstart*bs; 1928 for ( i=0; i<m; i++ ) { 1929 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 1930 mycols += locrowlens[i]; 1931 vals += locrowlens[i]; 1932 jj++; 1933 } 1934 } 1935 PetscFree(locrowlens); 1936 PetscFree(buf); 1937 PetscFree(ibuf); 1938 PetscFree(rowners); 1939 PetscFree(dlens); 1940 PetscFree(mask); 1941 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1942 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1943 return 0; 1944 } 1945 1946 1947