1 #ifndef lint 2 static char vcid[] = "$Id: mpibaij.c,v 1.57 1997/03/13 16:30:14 curfman Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" 6 #include "src/vec/vecimpl.h" 7 8 #include "draw.h" 9 #include "pinclude/pviewer.h" 10 11 extern int MatSetUpMultiply_MPIBAIJ(Mat); 12 extern int DisAssemble_MPIBAIJ(Mat); 13 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 14 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 15 extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 16 extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 17 extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 18 extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 19 extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 20 extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 21 extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 22 extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 23 24 25 /* 26 Local utility routine that creates a mapping from the global column 27 number to the local number in the off-diagonal part of the local 28 storage of the matrix. This is done in a non scable way since the 29 length of colmap equals the global matrix length. 30 */ 31 #undef __FUNC__ 32 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 33 static int CreateColmap_MPIBAIJ_Private(Mat mat) 34 { 35 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 36 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 37 int nbs = B->nbs,i,bs=B->bs;; 38 39 baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 40 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 41 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 42 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 43 return 0; 44 } 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_MPIBAIJ(" 48 static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 52 int ierr; 53 if (aij->size == 1) { 54 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 55 } else SETERRQ(1,0,"not supported in parallel"); 56 return 0; 57 } 58 59 #undef __FUNC__ 60 #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ" 61 static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 62 PetscTruth *done) 63 { 64 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 65 int ierr; 66 if (aij->size == 1) { 67 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 68 } else SETERRQ(1,0,"not supported in parallel"); 69 return 0; 70 } 71 #define CHUNKSIZE 10 72 73 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 74 { \ 75 \ 76 brow = row/bs; \ 77 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 78 rmax = imax[brow]; nrow = ailen[brow]; \ 79 bcol = col/bs; \ 80 ridx = row % bs; cidx = col % bs; \ 81 low = 0; high = nrow; \ 82 while (high-low > 3) { \ 83 t = (low+high)/2; \ 84 if (rp[t] > bcol) high = t; \ 85 else low = t; \ 86 } \ 87 for ( _i=low; _i<high; _i++ ) { \ 88 if (rp[_i] > bcol) break; \ 89 if (rp[_i] == bcol) { \ 90 bap = ap + bs2*_i + bs*cidx + ridx; \ 91 if (addv == ADD_VALUES) *bap += value; \ 92 else *bap = value; \ 93 goto _noinsert; \ 94 } \ 95 } \ 96 if (a->nonew) goto _noinsert; \ 97 if (nrow >= rmax) { \ 98 /* there is no extra room in row, therefore enlarge */ \ 99 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 100 Scalar *new_a; \ 101 \ 102 /* malloc new storage space */ \ 103 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 104 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 105 new_j = (int *) (new_a + bs2*new_nz); \ 106 new_i = new_j + new_nz; \ 107 \ 108 /* copy over old data into new slots */ \ 109 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 110 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 111 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 112 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 113 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 114 len*sizeof(int)); \ 115 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 116 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 117 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 118 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 119 /* free up old matrix storage */ \ 120 PetscFree(a->a); \ 121 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 122 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 123 a->singlemalloc = 1; \ 124 \ 125 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 126 rmax = imax[brow] = imax[brow] + CHUNKSIZE; \ 127 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 128 a->maxnz += bs2*CHUNKSIZE; \ 129 a->reallocs++; \ 130 a->nz++; \ 131 } \ 132 N = nrow++ - 1; \ 133 /* shift up all the later entries in this row */ \ 134 for ( ii=N; ii>=_i; ii-- ) { \ 135 rp[ii+1] = rp[ii]; \ 136 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 137 } \ 138 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 139 rp[_i] = bcol; \ 140 ap[bs2*_i + bs*cidx + ridx] = value; \ 141 _noinsert:; \ 142 ailen[brow] = nrow; \ 143 } 144 145 extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 146 #undef __FUNC__ 147 #define __FUNC__ "MatSetValues_MPIBAIJ" 148 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 149 { 150 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 151 Scalar value; 152 int ierr,i,j,row,col; 153 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 154 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 155 int cend_orig=baij->cend_bs,bs=baij->bs; 156 157 /* Some Variables required in the macro */ 158 Mat A = baij->A; 159 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 160 int *rp,ii,nrow,_i,rmax,N; 161 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 162 int *aj=a->j,brow,bcol; 163 int low,high,t,ridx,cidx,bs2=a->bs2; 164 Scalar *ap,*aa=a->a,*bap; 165 166 for ( i=0; i<m; i++ ) { 167 #if defined(PETSC_BOPT_g) 168 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 169 if (im[i] >= baij->M) SETERRQ(1,0,"Row too large"); 170 #endif 171 if (im[i] >= rstart_orig && im[i] < rend_orig) { 172 row = im[i] - rstart_orig; 173 for ( j=0; j<n; j++ ) { 174 if (in[j] >= cstart_orig && in[j] < cend_orig){ 175 col = in[j] - cstart_orig; 176 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 177 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 178 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 179 } 180 #if defined(PETSC_BOPT_g) 181 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 182 else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");} 183 #endif 184 else { 185 if (mat->was_assembled) { 186 if (!baij->colmap) { 187 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 188 } 189 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 190 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 191 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 192 col = in[j]; 193 } 194 } 195 else col = in[j]; 196 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 197 ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 198 } 199 } 200 } 201 else { 202 if (roworiented && !baij->donotstash) { 203 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 204 } 205 else { 206 if (!baij->donotstash) { 207 row = im[i]; 208 for ( j=0; j<n; j++ ) { 209 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 210 } 211 } 212 } 213 } 214 } 215 return 0; 216 } 217 218 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 219 #undef __FUNC__ 220 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 221 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 222 { 223 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 224 Scalar *value,*tmp; 225 int ierr,i,j,ii,jj,row,col,k,l; 226 int roworiented = baij->roworiented,rstart=baij->rstart ; 227 int rend=baij->rend,cstart=baij->cstart,stepval; 228 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 229 230 /* Should be stashed somewhere to avoid multiple mallocs */ 231 tmp = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(tmp); 232 if (roworiented) { 233 stepval = (n-1)*bs; 234 } else { 235 stepval = (m-1)*bs; 236 } 237 for ( i=0; i<m; i++ ) { 238 #if defined(PETSC_BOPT_g) 239 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 240 if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large"); 241 #endif 242 if (im[i] >= rstart && im[i] < rend) { 243 row = im[i] - rstart; 244 for ( j=0; j<n; j++ ) { 245 if (roworiented) { 246 value = v + i*(stepval+bs)*bs + j*bs; 247 } else { 248 value = v + j*(stepval+bs)*bs + i*bs; 249 } 250 for ( ii=0; ii<bs; ii++,value+=stepval ) 251 for (jj=0; jj<bs; jj++ ) 252 *tmp++ = *value++; 253 tmp -=bs2; 254 255 if (in[j] >= cstart && in[j] < cend){ 256 col = in[j] - cstart; 257 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,tmp,addv);CHKERRQ(ierr); 258 } 259 #if defined(PETSC_BOPT_g) 260 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 261 else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");} 262 #endif 263 else { 264 if (mat->was_assembled) { 265 if (!baij->colmap) { 266 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 267 } 268 col = baij->colmap[in[j]] - 1; 269 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 270 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 271 col = in[j]; 272 } 273 } 274 else col = in[j]; 275 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,tmp,addv);CHKERRQ(ierr); 276 } 277 } 278 } 279 else { 280 if (!baij->donotstash) { 281 if (roworiented ) { 282 row = im[i]*bs; 283 value = v + i*(stepval+bs)*bs; 284 for ( j=0; j<bs; j++,row++ ) { 285 for ( k=0; k<n; k++ ) { 286 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 287 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 288 } 289 } 290 } 291 } 292 else { 293 for ( j=0; j<n; j++ ) { 294 value = v + j*(stepval+bs)*bs + i*bs; 295 col = in[j]*bs; 296 for ( k=0; k<bs; k++,col++,value+=stepval) { 297 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 298 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 299 } 300 } 301 } 302 303 } 304 } 305 } 306 } 307 PetscFree(tmp); 308 return 0; 309 } 310 311 #undef __FUNC__ 312 #define __FUNC__ "MatGetValues_MPIBAIJ" 313 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 314 { 315 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 316 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 317 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 318 319 for ( i=0; i<m; i++ ) { 320 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 321 if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large"); 322 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 323 row = idxm[i] - bsrstart; 324 for ( j=0; j<n; j++ ) { 325 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 326 if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large"); 327 if (idxn[j] >= bscstart && idxn[j] < bscend){ 328 col = idxn[j] - bscstart; 329 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 330 } 331 else { 332 if (!baij->colmap) { 333 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 334 } 335 if((baij->colmap[idxn[j]/bs]-1 < 0) || 336 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 337 else { 338 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 339 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 340 } 341 } 342 } 343 } 344 else { 345 SETERRQ(1,0,"Only local values currently supported"); 346 } 347 } 348 return 0; 349 } 350 351 #undef __FUNC__ 352 #define __FUNC__ "MatNorm_MPIBAIJ" 353 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 354 { 355 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 356 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 357 int ierr, i,bs2=baij->bs2; 358 double sum = 0.0; 359 Scalar *v; 360 361 if (baij->size == 1) { 362 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 363 } else { 364 if (type == NORM_FROBENIUS) { 365 v = amat->a; 366 for (i=0; i<amat->nz*bs2; i++ ) { 367 #if defined(PETSC_COMPLEX) 368 sum += real(conj(*v)*(*v)); v++; 369 #else 370 sum += (*v)*(*v); v++; 371 #endif 372 } 373 v = bmat->a; 374 for (i=0; i<bmat->nz*bs2; i++ ) { 375 #if defined(PETSC_COMPLEX) 376 sum += real(conj(*v)*(*v)); v++; 377 #else 378 sum += (*v)*(*v); v++; 379 #endif 380 } 381 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 382 *norm = sqrt(*norm); 383 } 384 else 385 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 386 } 387 return 0; 388 } 389 390 #undef __FUNC__ 391 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 392 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 393 { 394 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 395 MPI_Comm comm = mat->comm; 396 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 397 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 398 MPI_Request *send_waits,*recv_waits; 399 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 400 InsertMode addv; 401 Scalar *rvalues,*svalues; 402 403 /* make sure all processors are either in INSERTMODE or ADDMODE */ 404 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 405 if (addv == (ADD_VALUES|INSERT_VALUES)) { 406 SETERRQ(1,0,"Some processors inserted others added"); 407 } 408 mat->insertmode = addv; /* in case this processor had no cache */ 409 410 /* first count number of contributors to each processor */ 411 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 412 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 413 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 414 for ( i=0; i<baij->stash.n; i++ ) { 415 idx = baij->stash.idx[i]; 416 for ( j=0; j<size; j++ ) { 417 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 418 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 419 } 420 } 421 } 422 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 423 424 /* inform other processors of number of messages and max length*/ 425 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 426 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 427 nreceives = work[rank]; 428 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 429 nmax = work[rank]; 430 PetscFree(work); 431 432 /* post receives: 433 1) each message will consist of ordered pairs 434 (global index,value) we store the global index as a double 435 to simplify the message passing. 436 2) since we don't know how long each individual message is we 437 allocate the largest needed buffer for each receive. Potentially 438 this is a lot of wasted space. 439 440 441 This could be done better. 442 */ 443 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 444 CHKPTRQ(rvalues); 445 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 446 CHKPTRQ(recv_waits); 447 for ( i=0; i<nreceives; i++ ) { 448 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 449 comm,recv_waits+i); 450 } 451 452 /* do sends: 453 1) starts[i] gives the starting index in svalues for stuff going to 454 the ith processor 455 */ 456 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 457 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 458 CHKPTRQ(send_waits); 459 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 460 starts[0] = 0; 461 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 462 for ( i=0; i<baij->stash.n; i++ ) { 463 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 464 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 465 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 466 } 467 PetscFree(owner); 468 starts[0] = 0; 469 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 470 count = 0; 471 for ( i=0; i<size; i++ ) { 472 if (procs[i]) { 473 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 474 comm,send_waits+count++); 475 } 476 } 477 PetscFree(starts); PetscFree(nprocs); 478 479 /* Free cache space */ 480 PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 481 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 482 483 baij->svalues = svalues; baij->rvalues = rvalues; 484 baij->nsends = nsends; baij->nrecvs = nreceives; 485 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 486 baij->rmax = nmax; 487 488 return 0; 489 } 490 491 492 #undef __FUNC__ 493 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 494 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 495 { 496 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 497 MPI_Status *send_status,recv_status; 498 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 499 int bs=baij->bs,row,col,other_disassembled; 500 Scalar *values,val; 501 InsertMode addv = mat->insertmode; 502 503 /* wait on receives */ 504 while (count) { 505 MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 506 /* unpack receives into our local space */ 507 values = baij->rvalues + 3*imdex*baij->rmax; 508 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 509 n = n/3; 510 for ( i=0; i<n; i++ ) { 511 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 512 col = (int) PetscReal(values[3*i+1]); 513 val = values[3*i+2]; 514 if (col >= baij->cstart*bs && col < baij->cend*bs) { 515 col -= baij->cstart*bs; 516 MatSetValues(baij->A,1,&row,1,&col,&val,addv); 517 } 518 else { 519 if (mat->was_assembled) { 520 if (!baij->colmap) { 521 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 522 } 523 col = (baij->colmap[col/bs]-1)*bs + col%bs; 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 526 col = (int) PetscReal(values[3*i+1]); 527 } 528 } 529 MatSetValues(baij->B,1,&row,1,&col,&val,addv); 530 } 531 } 532 count--; 533 } 534 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 535 536 /* wait on sends */ 537 if (baij->nsends) { 538 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 539 CHKPTRQ(send_status); 540 MPI_Waitall(baij->nsends,baij->send_waits,send_status); 541 PetscFree(send_status); 542 } 543 PetscFree(baij->send_waits); PetscFree(baij->svalues); 544 545 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 546 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 547 548 /* determine if any processor has disassembled, if so we must 549 also disassemble ourselfs, in order that we may reassemble. */ 550 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 551 if (mat->was_assembled && !other_disassembled) { 552 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 553 } 554 555 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 556 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 557 } 558 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 559 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 560 561 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 562 return 0; 563 } 564 565 #undef __FUNC__ 566 #define __FUNC__ "MatView_MPIBAIJ_Binary" 567 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 568 { 569 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 570 int ierr; 571 572 if (baij->size == 1) { 573 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 574 } 575 else SETERRQ(1,0,"Only uniprocessor output supported"); 576 return 0; 577 } 578 579 #undef __FUNC__ 580 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 581 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 582 { 583 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 584 int ierr, format,rank,bs = baij->bs; 585 FILE *fd; 586 ViewerType vtype; 587 588 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 589 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 590 ierr = ViewerGetFormat(viewer,&format); 591 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 592 MatInfo info; 593 MPI_Comm_rank(mat->comm,&rank); 594 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 595 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 596 PetscSequentialPhaseBegin(mat->comm,1); 597 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 598 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 599 baij->bs,(int)info.memory); 600 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 601 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 602 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 603 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 604 fflush(fd); 605 PetscSequentialPhaseEnd(mat->comm,1); 606 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 607 return 0; 608 } 609 else if (format == VIEWER_FORMAT_ASCII_INFO) { 610 PetscPrintf(mat->comm," block size is %d\n",bs); 611 return 0; 612 } 613 } 614 615 if (vtype == DRAW_VIEWER) { 616 Draw draw; 617 PetscTruth isnull; 618 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 619 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 620 } 621 622 if (vtype == ASCII_FILE_VIEWER) { 623 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 624 PetscSequentialPhaseBegin(mat->comm,1); 625 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 626 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 627 baij->cstart*bs,baij->cend*bs); 628 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 629 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 630 fflush(fd); 631 PetscSequentialPhaseEnd(mat->comm,1); 632 } 633 else { 634 int size = baij->size; 635 rank = baij->rank; 636 if (size == 1) { 637 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 638 } 639 else { 640 /* assemble the entire matrix onto first processor. */ 641 Mat A; 642 Mat_SeqBAIJ *Aloc; 643 int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 644 int mbs=baij->mbs; 645 Scalar *a; 646 647 if (!rank) { 648 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 649 CHKERRQ(ierr); 650 } 651 else { 652 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 653 CHKERRQ(ierr); 654 } 655 PLogObjectParent(mat,A); 656 657 /* copy over the A part */ 658 Aloc = (Mat_SeqBAIJ*) baij->A->data; 659 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 660 row = baij->rstart; 661 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 662 663 for ( i=0; i<mbs; i++ ) { 664 rvals[0] = bs*(baij->rstart + i); 665 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 666 for ( j=ai[i]; j<ai[i+1]; j++ ) { 667 col = (baij->cstart+aj[j])*bs; 668 for (k=0; k<bs; k++ ) { 669 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 670 col++; a += bs; 671 } 672 } 673 } 674 /* copy over the B part */ 675 Aloc = (Mat_SeqBAIJ*) baij->B->data; 676 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 677 row = baij->rstart*bs; 678 for ( i=0; i<mbs; i++ ) { 679 rvals[0] = bs*(baij->rstart + i); 680 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 681 for ( j=ai[i]; j<ai[i+1]; j++ ) { 682 col = baij->garray[aj[j]]*bs; 683 for (k=0; k<bs; k++ ) { 684 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 685 col++; a += bs; 686 } 687 } 688 } 689 PetscFree(rvals); 690 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 691 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 692 if (!rank) { 693 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 694 } 695 ierr = MatDestroy(A); CHKERRQ(ierr); 696 } 697 } 698 return 0; 699 } 700 701 702 703 #undef __FUNC__ 704 #define __FUNC__ "MatView_MPIBAIJ" 705 int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 706 { 707 Mat mat = (Mat) obj; 708 int ierr; 709 ViewerType vtype; 710 711 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 712 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 713 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 714 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 715 } 716 else if (vtype == BINARY_FILE_VIEWER) { 717 return MatView_MPIBAIJ_Binary(mat,viewer); 718 } 719 return 0; 720 } 721 722 #undef __FUNC__ 723 #define __FUNC__ "MatDestroy_MPIBAIJ" 724 int MatDestroy_MPIBAIJ(PetscObject obj) 725 { 726 Mat mat = (Mat) obj; 727 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 728 int ierr; 729 730 #if defined(PETSC_LOG) 731 PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 732 #endif 733 734 PetscFree(baij->rowners); 735 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 736 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 737 if (baij->colmap) PetscFree(baij->colmap); 738 if (baij->garray) PetscFree(baij->garray); 739 if (baij->lvec) VecDestroy(baij->lvec); 740 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 741 if (baij->rowvalues) PetscFree(baij->rowvalues); 742 PetscFree(baij); 743 if (mat->mapping) { 744 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 745 } 746 PLogObjectDestroy(mat); 747 PetscHeaderDestroy(mat); 748 return 0; 749 } 750 751 #undef __FUNC__ 752 #define __FUNC__ "MatMult_MPIBAIJ" 753 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 754 { 755 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 756 int ierr, nt; 757 758 VecGetLocalSize_Fast(xx,nt); 759 if (nt != a->n) { 760 SETERRQ(1,0,"Incompatible partition of A and xx"); 761 } 762 VecGetLocalSize_Fast(yy,nt); 763 if (nt != a->m) { 764 SETERRQ(1,0,"Incompatible parition of A and yy"); 765 } 766 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 767 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 768 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 769 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 770 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 771 return 0; 772 } 773 774 #undef __FUNC__ 775 #define __FUNC__ "MatMultAdd_MPIBAIJ" 776 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 777 { 778 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 779 int ierr; 780 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 781 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 782 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 783 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 784 return 0; 785 } 786 787 #undef __FUNC__ 788 #define __FUNC__ "MatMultTrans_MPIBAIJ" 789 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 790 { 791 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 792 int ierr; 793 794 /* do nondiagonal part */ 795 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 796 /* send it on its way */ 797 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 798 /* do local part */ 799 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 800 /* receive remote parts: note this assumes the values are not actually */ 801 /* inserted in yy until the next line, which is true for my implementation*/ 802 /* but is not perhaps always true. */ 803 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 804 return 0; 805 } 806 807 #undef __FUNC__ 808 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 809 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 810 { 811 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 812 int ierr; 813 814 /* do nondiagonal part */ 815 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 816 /* send it on its way */ 817 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 818 /* do local part */ 819 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 820 /* receive remote parts: note this assumes the values are not actually */ 821 /* inserted in yy until the next line, which is true for my implementation*/ 822 /* but is not perhaps always true. */ 823 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 824 return 0; 825 } 826 827 /* 828 This only works correctly for square matrices where the subblock A->A is the 829 diagonal block 830 */ 831 #undef __FUNC__ 832 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 833 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 834 { 835 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 836 if (a->M != a->N) 837 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 838 return MatGetDiagonal(a->A,v); 839 } 840 841 #undef __FUNC__ 842 #define __FUNC__ "MatScale_MPIBAIJ" 843 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 844 { 845 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 846 int ierr; 847 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 848 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 849 return 0; 850 } 851 852 #undef __FUNC__ 853 #define __FUNC__ "MatGetSize_MPIBAIJ" 854 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 855 { 856 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 857 *m = mat->M; *n = mat->N; 858 return 0; 859 } 860 861 #undef __FUNC__ 862 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 863 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 864 { 865 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 866 *m = mat->m; *n = mat->N; 867 return 0; 868 } 869 870 #undef __FUNC__ 871 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 872 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 873 { 874 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 875 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 876 return 0; 877 } 878 879 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 880 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 881 882 #undef __FUNC__ 883 #define __FUNC__ "MatGetRow_MPIBAIJ" 884 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 885 { 886 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 887 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 888 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 889 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 890 int *cmap, *idx_p,cstart = mat->cstart; 891 892 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 893 mat->getrowactive = PETSC_TRUE; 894 895 if (!mat->rowvalues && (idx || v)) { 896 /* 897 allocate enough space to hold information from the longest row. 898 */ 899 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 900 int max = 1,mbs = mat->mbs,tmp; 901 for ( i=0; i<mbs; i++ ) { 902 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 903 if (max < tmp) { max = tmp; } 904 } 905 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 906 CHKPTRQ(mat->rowvalues); 907 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 908 } 909 910 911 if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows") 912 lrow = row - brstart; 913 914 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 915 if (!v) {pvA = 0; pvB = 0;} 916 if (!idx) {pcA = 0; if (!v) pcB = 0;} 917 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 918 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 919 nztot = nzA + nzB; 920 921 cmap = mat->garray; 922 if (v || idx) { 923 if (nztot) { 924 /* Sort by increasing column numbers, assuming A and B already sorted */ 925 int imark = -1; 926 if (v) { 927 *v = v_p = mat->rowvalues; 928 for ( i=0; i<nzB; i++ ) { 929 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 930 else break; 931 } 932 imark = i; 933 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 934 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 935 } 936 if (idx) { 937 *idx = idx_p = mat->rowindices; 938 if (imark > -1) { 939 for ( i=0; i<imark; i++ ) { 940 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 941 } 942 } else { 943 for ( i=0; i<nzB; i++ ) { 944 if (cmap[cworkB[i]/bs] < cstart) 945 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 946 else break; 947 } 948 imark = i; 949 } 950 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 951 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 952 } 953 } 954 else { 955 if (idx) *idx = 0; 956 if (v) *v = 0; 957 } 958 } 959 *nz = nztot; 960 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 961 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 962 return 0; 963 } 964 965 #undef __FUNC__ 966 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 967 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 968 { 969 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 970 if (baij->getrowactive == PETSC_FALSE) { 971 SETERRQ(1,0,"MatGetRow not called"); 972 } 973 baij->getrowactive = PETSC_FALSE; 974 return 0; 975 } 976 977 #undef __FUNC__ 978 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 979 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 980 { 981 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 982 *bs = baij->bs; 983 return 0; 984 } 985 986 #undef __FUNC__ 987 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 988 int MatZeroEntries_MPIBAIJ(Mat A) 989 { 990 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 991 int ierr; 992 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 993 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 994 return 0; 995 } 996 997 #undef __FUNC__ 998 #define __FUNC__ "MatGetInfo_MPIBAIJ" 999 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1000 { 1001 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1002 Mat A = a->A, B = a->B; 1003 int ierr; 1004 double isend[5], irecv[5]; 1005 1006 info->rows_global = (double)a->M; 1007 info->columns_global = (double)a->N; 1008 info->rows_local = (double)a->m; 1009 info->columns_local = (double)a->N; 1010 info->block_size = (double)a->bs; 1011 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1012 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1013 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1014 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1015 if (flag == MAT_LOCAL) { 1016 info->nz_used = isend[0]; 1017 info->nz_allocated = isend[1]; 1018 info->nz_unneeded = isend[2]; 1019 info->memory = isend[3]; 1020 info->mallocs = isend[4]; 1021 } else if (flag == MAT_GLOBAL_MAX) { 1022 MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm); 1023 info->nz_used = irecv[0]; 1024 info->nz_allocated = irecv[1]; 1025 info->nz_unneeded = irecv[2]; 1026 info->memory = irecv[3]; 1027 info->mallocs = irecv[4]; 1028 } else if (flag == MAT_GLOBAL_SUM) { 1029 MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm); 1030 info->nz_used = irecv[0]; 1031 info->nz_allocated = irecv[1]; 1032 info->nz_unneeded = irecv[2]; 1033 info->memory = irecv[3]; 1034 info->mallocs = irecv[4]; 1035 } 1036 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1037 info->fill_ratio_needed = 0; 1038 info->factor_mallocs = 0; 1039 return 0; 1040 } 1041 1042 #undef __FUNC__ 1043 #define __FUNC__ "MatSetOption_MPIBAIJ" 1044 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1045 { 1046 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1047 1048 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1049 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1050 op == MAT_COLUMNS_UNSORTED || 1051 op == MAT_COLUMNS_SORTED || 1052 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1053 MatSetOption(a->A,op); 1054 MatSetOption(a->B,op); 1055 } else if (op == MAT_ROW_ORIENTED) { 1056 a->roworiented = 1; 1057 MatSetOption(a->A,op); 1058 MatSetOption(a->B,op); 1059 } else if (op == MAT_ROWS_SORTED || 1060 op == MAT_ROWS_UNSORTED || 1061 op == MAT_SYMMETRIC || 1062 op == MAT_STRUCTURALLY_SYMMETRIC || 1063 op == MAT_YES_NEW_DIAGONALS) 1064 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1065 else if (op == MAT_COLUMN_ORIENTED) { 1066 a->roworiented = 0; 1067 MatSetOption(a->A,op); 1068 MatSetOption(a->B,op); 1069 } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 1070 a->donotstash = 1; 1071 } else if (op == MAT_NO_NEW_DIAGONALS) 1072 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1073 else 1074 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1075 return 0; 1076 } 1077 1078 #undef __FUNC__ 1079 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1080 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1081 { 1082 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1083 Mat_SeqBAIJ *Aloc; 1084 Mat B; 1085 int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 1086 int bs=baij->bs,mbs=baij->mbs; 1087 Scalar *a; 1088 1089 if (matout == PETSC_NULL && M != N) 1090 SETERRQ(1,0,"Square matrix only for in-place"); 1091 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1092 CHKERRQ(ierr); 1093 1094 /* copy over the A part */ 1095 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1096 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1097 row = baij->rstart; 1098 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1099 1100 for ( i=0; i<mbs; i++ ) { 1101 rvals[0] = bs*(baij->rstart + i); 1102 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1103 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1104 col = (baij->cstart+aj[j])*bs; 1105 for (k=0; k<bs; k++ ) { 1106 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1107 col++; a += bs; 1108 } 1109 } 1110 } 1111 /* copy over the B part */ 1112 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1113 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1114 row = baij->rstart*bs; 1115 for ( i=0; i<mbs; i++ ) { 1116 rvals[0] = bs*(baij->rstart + i); 1117 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1118 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1119 col = baij->garray[aj[j]]*bs; 1120 for (k=0; k<bs; k++ ) { 1121 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1122 col++; a += bs; 1123 } 1124 } 1125 } 1126 PetscFree(rvals); 1127 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1128 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1129 1130 if (matout != PETSC_NULL) { 1131 *matout = B; 1132 } else { 1133 /* This isn't really an in-place transpose .... but free data structures from baij */ 1134 PetscFree(baij->rowners); 1135 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1136 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1137 if (baij->colmap) PetscFree(baij->colmap); 1138 if (baij->garray) PetscFree(baij->garray); 1139 if (baij->lvec) VecDestroy(baij->lvec); 1140 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1141 PetscFree(baij); 1142 PetscMemcpy(A,B,sizeof(struct _Mat)); 1143 PetscHeaderDestroy(B); 1144 } 1145 return 0; 1146 } 1147 1148 #undef __FUNC__ 1149 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1150 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1151 { 1152 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1153 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1154 int ierr,s1,s2,s3; 1155 1156 if (ll) { 1157 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1158 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1159 if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes"); 1160 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1161 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1162 } 1163 if (rr) SETERRQ(1,0,"not supported for right vector"); 1164 return 0; 1165 } 1166 1167 /* the code does not do the diagonal entries correctly unless the 1168 matrix is square and the column and row owerships are identical. 1169 This is a BUG. The only way to fix it seems to be to access 1170 baij->A and baij->B directly and not through the MatZeroRows() 1171 routine. 1172 */ 1173 #undef __FUNC__ 1174 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1175 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1176 { 1177 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1178 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1179 int *procs,*nprocs,j,found,idx,nsends,*work; 1180 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1181 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1182 int *lens,imdex,*lrows,*values,bs=l->bs; 1183 MPI_Comm comm = A->comm; 1184 MPI_Request *send_waits,*recv_waits; 1185 MPI_Status recv_status,*send_status; 1186 IS istmp; 1187 1188 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1189 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1190 1191 /* first count number of contributors to each processor */ 1192 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1193 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1194 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1195 for ( i=0; i<N; i++ ) { 1196 idx = rows[i]; 1197 found = 0; 1198 for ( j=0; j<size; j++ ) { 1199 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1200 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1201 } 1202 } 1203 if (!found) SETERRQ(1,0,"Index out of range"); 1204 } 1205 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1206 1207 /* inform other processors of number of messages and max length*/ 1208 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1209 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 1210 nrecvs = work[rank]; 1211 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 1212 nmax = work[rank]; 1213 PetscFree(work); 1214 1215 /* post receives: */ 1216 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 1217 CHKPTRQ(rvalues); 1218 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 1219 CHKPTRQ(recv_waits); 1220 for ( i=0; i<nrecvs; i++ ) { 1221 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 1222 } 1223 1224 /* do sends: 1225 1) starts[i] gives the starting index in svalues for stuff going to 1226 the ith processor 1227 */ 1228 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1229 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 1230 CHKPTRQ(send_waits); 1231 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1232 starts[0] = 0; 1233 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1234 for ( i=0; i<N; i++ ) { 1235 svalues[starts[owner[i]]++] = rows[i]; 1236 } 1237 ISRestoreIndices(is,&rows); 1238 1239 starts[0] = 0; 1240 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1241 count = 0; 1242 for ( i=0; i<size; i++ ) { 1243 if (procs[i]) { 1244 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 1245 } 1246 } 1247 PetscFree(starts); 1248 1249 base = owners[rank]*bs; 1250 1251 /* wait on receives */ 1252 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1253 source = lens + nrecvs; 1254 count = nrecvs; slen = 0; 1255 while (count) { 1256 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 1257 /* unpack receives into our local space */ 1258 MPI_Get_count(&recv_status,MPI_INT,&n); 1259 source[imdex] = recv_status.MPI_SOURCE; 1260 lens[imdex] = n; 1261 slen += n; 1262 count--; 1263 } 1264 PetscFree(recv_waits); 1265 1266 /* move the data into the send scatter */ 1267 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1268 count = 0; 1269 for ( i=0; i<nrecvs; i++ ) { 1270 values = rvalues + i*nmax; 1271 for ( j=0; j<lens[i]; j++ ) { 1272 lrows[count++] = values[j] - base; 1273 } 1274 } 1275 PetscFree(rvalues); PetscFree(lens); 1276 PetscFree(owner); PetscFree(nprocs); 1277 1278 /* actually zap the local rows */ 1279 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1280 PLogObjectParent(A,istmp); 1281 PetscFree(lrows); 1282 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1283 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1284 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1285 1286 /* wait on sends */ 1287 if (nsends) { 1288 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 1289 CHKPTRQ(send_status); 1290 MPI_Waitall(nsends,send_waits,send_status); 1291 PetscFree(send_status); 1292 } 1293 PetscFree(send_waits); PetscFree(svalues); 1294 1295 return 0; 1296 } 1297 extern int MatPrintHelp_SeqBAIJ(Mat); 1298 #undef __FUNC__ 1299 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1300 int MatPrintHelp_MPIBAIJ(Mat A) 1301 { 1302 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1303 1304 if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1305 else return 0; 1306 } 1307 1308 #undef __FUNC__ 1309 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1310 int MatSetUnfactored_MPIBAIJ(Mat A) 1311 { 1312 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1313 int ierr; 1314 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1315 return 0; 1316 } 1317 1318 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1319 1320 /* -------------------------------------------------------------------*/ 1321 static struct _MatOps MatOps = { 1322 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1323 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1324 0,0,0,0, 1325 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1326 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1327 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1328 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1329 0,0,0,MatGetSize_MPIBAIJ, 1330 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1331 0,0,MatConvertSameType_MPIBAIJ,0,0, 1332 0,0,0,MatGetSubMatrices_MPIBAIJ, 1333 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1334 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1335 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1336 1337 1338 #undef __FUNC__ 1339 #define __FUNC__ "MatCreateMPIBAIJ" 1340 /*@C 1341 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1342 (block compressed row). For good matrix assembly performance 1343 the user should preallocate the matrix storage by setting the parameters 1344 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1345 performance can be increased by more than a factor of 50. 1346 1347 Input Parameters: 1348 . comm - MPI communicator 1349 . bs - size of blockk 1350 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1351 This value should be the same as the local size used in creating the 1352 y vector for the matrix-vector product y = Ax. 1353 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1354 This value should be the same as the local size used in creating the 1355 x vector for the matrix-vector product y = Ax. 1356 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1357 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1358 . d_nz - number of block nonzeros per block row in diagonal portion of local 1359 submatrix (same for all local rows) 1360 . d_nzz - array containing the number of block nonzeros in the various block rows 1361 of the in diagonal portion of the local (possibly different for each block 1362 row) or PETSC_NULL. You must leave room for the diagonal entry even if 1363 it is zero. 1364 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1365 submatrix (same for all local rows). 1366 . o_nzz - array containing the number of nonzeros in the various block rows of the 1367 off-diagonal portion of the local submatrix (possibly different for 1368 each block row) or PETSC_NULL. 1369 1370 Output Parameter: 1371 . A - the matrix 1372 1373 Notes: 1374 The user MUST specify either the local or global matrix dimensions 1375 (possibly both). 1376 1377 Storage Information: 1378 For a square global matrix we define each processor's diagonal portion 1379 to be its local rows and the corresponding columns (a square submatrix); 1380 each processor's off-diagonal portion encompasses the remainder of the 1381 local matrix (a rectangular submatrix). 1382 1383 The user can specify preallocated storage for the diagonal part of 1384 the local submatrix with either d_nz or d_nnz (not both). Set 1385 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1386 memory allocation. Likewise, specify preallocated storage for the 1387 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1388 1389 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1390 the figure below we depict these three local rows and all columns (0-11). 1391 1392 $ 0 1 2 3 4 5 6 7 8 9 10 11 1393 $ ------------------- 1394 $ row 3 | o o o d d d o o o o o o 1395 $ row 4 | o o o d d d o o o o o o 1396 $ row 5 | o o o d d d o o o o o o 1397 $ ------------------- 1398 $ 1399 1400 Thus, any entries in the d locations are stored in the d (diagonal) 1401 submatrix, and any entries in the o locations are stored in the 1402 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1403 stored simply in the MATSEQBAIJ format for compressed row storage. 1404 1405 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1406 and o_nz should indicate the number of nonzeros per row in the o matrix. 1407 In general, for PDE problems in which most nonzeros are near the diagonal, 1408 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1409 or you will get TERRIBLE performance; see the users' manual chapter on 1410 matrices. 1411 1412 .keywords: matrix, block, aij, compressed row, sparse, parallel 1413 1414 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 1415 @*/ 1416 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1417 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1418 { 1419 Mat B; 1420 Mat_MPIBAIJ *b; 1421 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 1422 1423 if (bs < 1) SETERRQ(1,0,"invalid block size specified"); 1424 *A = 0; 1425 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 1426 PLogObjectCreate(B); 1427 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1428 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1429 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1430 MPI_Comm_size(comm,&size); 1431 if (size == 1) { 1432 B->ops.getrowij = MatGetRowIJ_MPIBAIJ; 1433 B->ops.restorerowij = MatRestoreRowIJ_MPIBAIJ; 1434 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIBAIJ; 1435 B->ops.lufactornumeric = MatLUFactorNumeric_MPIBAIJ; 1436 B->ops.lufactor = MatLUFactor_MPIBAIJ; 1437 B->ops.solve = MatSolve_MPIBAIJ; 1438 B->ops.solveadd = MatSolveAdd_MPIBAIJ; 1439 B->ops.solvetrans = MatSolveTrans_MPIBAIJ; 1440 B->ops.solvetransadd = MatSolveTransAdd_MPIBAIJ; 1441 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ; 1442 } 1443 1444 B->destroy = MatDestroy_MPIBAIJ; 1445 B->view = MatView_MPIBAIJ; 1446 B->mapping = 0; 1447 B->factor = 0; 1448 B->assembled = PETSC_FALSE; 1449 1450 B->insertmode = NOT_SET_VALUES; 1451 MPI_Comm_rank(comm,&b->rank); 1452 MPI_Comm_size(comm,&b->size); 1453 1454 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1455 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1456 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified"); 1457 if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified"); 1458 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1459 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1460 1461 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1462 work[0] = m; work[1] = n; 1463 mbs = m/bs; nbs = n/bs; 1464 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1465 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1466 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1467 } 1468 if (m == PETSC_DECIDE) { 1469 Mbs = M/bs; 1470 if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize"); 1471 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1472 m = mbs*bs; 1473 } 1474 if (n == PETSC_DECIDE) { 1475 Nbs = N/bs; 1476 if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize"); 1477 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1478 n = nbs*bs; 1479 } 1480 if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize"); 1481 1482 b->m = m; B->m = m; 1483 b->n = n; B->n = n; 1484 b->N = N; B->N = N; 1485 b->M = M; B->M = M; 1486 b->bs = bs; 1487 b->bs2 = bs*bs; 1488 b->mbs = mbs; 1489 b->nbs = nbs; 1490 b->Mbs = Mbs; 1491 b->Nbs = Nbs; 1492 1493 /* build local table of row and column ownerships */ 1494 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1495 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 1496 b->cowners = b->rowners + b->size + 2; 1497 MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1498 b->rowners[0] = 0; 1499 for ( i=2; i<=b->size; i++ ) { 1500 b->rowners[i] += b->rowners[i-1]; 1501 } 1502 b->rstart = b->rowners[b->rank]; 1503 b->rend = b->rowners[b->rank+1]; 1504 b->rstart_bs = b->rstart * bs; 1505 b->rend_bs = b->rend * bs; 1506 1507 MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1508 b->cowners[0] = 0; 1509 for ( i=2; i<=b->size; i++ ) { 1510 b->cowners[i] += b->cowners[i-1]; 1511 } 1512 b->cstart = b->cowners[b->rank]; 1513 b->cend = b->cowners[b->rank+1]; 1514 b->cstart_bs = b->cstart * bs; 1515 b->cend_bs = b->cend * bs; 1516 1517 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1518 ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1519 PLogObjectParent(B,b->A); 1520 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1521 ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1522 PLogObjectParent(B,b->B); 1523 1524 /* build cache for off array entries formed */ 1525 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1526 b->donotstash = 0; 1527 b->colmap = 0; 1528 b->garray = 0; 1529 b->roworiented = 1; 1530 1531 /* stuff used for matrix vector multiply */ 1532 b->lvec = 0; 1533 b->Mvctx = 0; 1534 1535 /* stuff for MatGetRow() */ 1536 b->rowindices = 0; 1537 b->rowvalues = 0; 1538 b->getrowactive = PETSC_FALSE; 1539 1540 *A = B; 1541 return 0; 1542 } 1543 1544 #undef __FUNC__ 1545 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 1546 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 1547 { 1548 Mat mat; 1549 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 1550 int ierr, len=0, flg; 1551 1552 *newmat = 0; 1553 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 1554 PLogObjectCreate(mat); 1555 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 1556 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1557 mat->destroy = MatDestroy_MPIBAIJ; 1558 mat->view = MatView_MPIBAIJ; 1559 mat->factor = matin->factor; 1560 mat->assembled = PETSC_TRUE; 1561 1562 a->m = mat->m = oldmat->m; 1563 a->n = mat->n = oldmat->n; 1564 a->M = mat->M = oldmat->M; 1565 a->N = mat->N = oldmat->N; 1566 1567 a->bs = oldmat->bs; 1568 a->bs2 = oldmat->bs2; 1569 a->mbs = oldmat->mbs; 1570 a->nbs = oldmat->nbs; 1571 a->Mbs = oldmat->Mbs; 1572 a->Nbs = oldmat->Nbs; 1573 1574 a->rstart = oldmat->rstart; 1575 a->rend = oldmat->rend; 1576 a->cstart = oldmat->cstart; 1577 a->cend = oldmat->cend; 1578 a->size = oldmat->size; 1579 a->rank = oldmat->rank; 1580 mat->insertmode = NOT_SET_VALUES; 1581 a->rowvalues = 0; 1582 a->getrowactive = PETSC_FALSE; 1583 1584 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1585 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 1586 a->cowners = a->rowners + a->size + 2; 1587 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1588 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1589 if (oldmat->colmap) { 1590 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 1591 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1592 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 1593 } else a->colmap = 0; 1594 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 1595 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1596 PLogObjectMemory(mat,len*sizeof(int)); 1597 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1598 } else a->garray = 0; 1599 1600 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1601 PLogObjectParent(mat,a->lvec); 1602 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1603 PLogObjectParent(mat,a->Mvctx); 1604 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1605 PLogObjectParent(mat,a->A); 1606 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1607 PLogObjectParent(mat,a->B); 1608 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1609 if (flg) { 1610 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1611 } 1612 *newmat = mat; 1613 return 0; 1614 } 1615 1616 #include "sys.h" 1617 1618 #undef __FUNC__ 1619 #define __FUNC__ "MatLoad_MPIBAIJ" 1620 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 1621 { 1622 Mat A; 1623 int i, nz, ierr, j,rstart, rend, fd; 1624 Scalar *vals,*buf; 1625 MPI_Comm comm = ((PetscObject)viewer)->comm; 1626 MPI_Status status; 1627 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 1628 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 1629 int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 1630 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1631 int dcount,kmax,k,nzcount,tmp; 1632 1633 1634 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1635 bs2 = bs*bs; 1636 1637 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1638 if (!rank) { 1639 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1640 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1641 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1642 } 1643 1644 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1645 M = header[1]; N = header[2]; 1646 1647 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 1648 1649 /* 1650 This code adds extra rows to make sure the number of rows is 1651 divisible by the blocksize 1652 */ 1653 Mbs = M/bs; 1654 extra_rows = bs - M + bs*(Mbs); 1655 if (extra_rows == bs) extra_rows = 0; 1656 else Mbs++; 1657 if (extra_rows &&!rank) { 1658 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 1659 } 1660 1661 /* determine ownership of all rows */ 1662 mbs = Mbs/size + ((Mbs % size) > rank); 1663 m = mbs * bs; 1664 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1665 browners = rowners + size + 1; 1666 MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1667 rowners[0] = 0; 1668 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1669 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 1670 rstart = rowners[rank]; 1671 rend = rowners[rank+1]; 1672 1673 /* distribute row lengths to all processors */ 1674 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 1675 if (!rank) { 1676 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 1677 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1678 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1679 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1680 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1681 MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 1682 PetscFree(sndcounts); 1683 } 1684 else { 1685 MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 1686 } 1687 1688 if (!rank) { 1689 /* calculate the number of nonzeros on each processor */ 1690 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1691 PetscMemzero(procsnz,size*sizeof(int)); 1692 for ( i=0; i<size; i++ ) { 1693 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 1694 procsnz[i] += rowlengths[j]; 1695 } 1696 } 1697 PetscFree(rowlengths); 1698 1699 /* determine max buffer needed and allocate it */ 1700 maxnz = 0; 1701 for ( i=0; i<size; i++ ) { 1702 maxnz = PetscMax(maxnz,procsnz[i]); 1703 } 1704 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1705 1706 /* read in my part of the matrix column indices */ 1707 nz = procsnz[0]; 1708 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 1709 mycols = ibuf; 1710 if (size == 1) nz -= extra_rows; 1711 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1712 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1713 1714 /* read in every ones (except the last) and ship off */ 1715 for ( i=1; i<size-1; i++ ) { 1716 nz = procsnz[i]; 1717 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1718 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1719 } 1720 /* read in the stuff for the last proc */ 1721 if ( size != 1 ) { 1722 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 1723 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1724 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1725 MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 1726 } 1727 PetscFree(cols); 1728 } 1729 else { 1730 /* determine buffer space needed for message */ 1731 nz = 0; 1732 for ( i=0; i<m; i++ ) { 1733 nz += locrowlens[i]; 1734 } 1735 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 1736 mycols = ibuf; 1737 /* receive message of column indices*/ 1738 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1739 MPI_Get_count(&status,MPI_INT,&maxnz); 1740 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1741 } 1742 1743 /* loop over local rows, determining number of off diagonal entries */ 1744 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1745 odlens = dlens + (rend-rstart); 1746 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1747 PetscMemzero(mask,3*Mbs*sizeof(int)); 1748 masked1 = mask + Mbs; 1749 masked2 = masked1 + Mbs; 1750 rowcount = 0; nzcount = 0; 1751 for ( i=0; i<mbs; i++ ) { 1752 dcount = 0; 1753 odcount = 0; 1754 for ( j=0; j<bs; j++ ) { 1755 kmax = locrowlens[rowcount]; 1756 for ( k=0; k<kmax; k++ ) { 1757 tmp = mycols[nzcount++]/bs; 1758 if (!mask[tmp]) { 1759 mask[tmp] = 1; 1760 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 1761 else masked1[dcount++] = tmp; 1762 } 1763 } 1764 rowcount++; 1765 } 1766 1767 dlens[i] = dcount; 1768 odlens[i] = odcount; 1769 1770 /* zero out the mask elements we set */ 1771 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 1772 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 1773 } 1774 1775 /* create our matrix */ 1776 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1777 CHKERRQ(ierr); 1778 A = *newmat; 1779 MatSetOption(A,MAT_COLUMNS_SORTED); 1780 1781 if (!rank) { 1782 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 1783 /* read in my part of the matrix numerical values */ 1784 nz = procsnz[0]; 1785 vals = buf; 1786 mycols = ibuf; 1787 if (size == 1) nz -= extra_rows; 1788 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1789 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1790 1791 /* insert into matrix */ 1792 jj = rstart*bs; 1793 for ( i=0; i<m; i++ ) { 1794 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 1795 mycols += locrowlens[i]; 1796 vals += locrowlens[i]; 1797 jj++; 1798 } 1799 /* read in other processors (except the last one) and ship out */ 1800 for ( i=1; i<size-1; i++ ) { 1801 nz = procsnz[i]; 1802 vals = buf; 1803 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1804 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1805 } 1806 /* the last proc */ 1807 if ( size != 1 ){ 1808 nz = procsnz[i] - extra_rows; 1809 vals = buf; 1810 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1811 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1812 MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 1813 } 1814 PetscFree(procsnz); 1815 } 1816 else { 1817 /* receive numeric values */ 1818 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 1819 1820 /* receive message of values*/ 1821 vals = buf; 1822 mycols = ibuf; 1823 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1824 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1825 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1826 1827 /* insert into matrix */ 1828 jj = rstart*bs; 1829 for ( i=0; i<m; i++ ) { 1830 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 1831 mycols += locrowlens[i]; 1832 vals += locrowlens[i]; 1833 jj++; 1834 } 1835 } 1836 PetscFree(locrowlens); 1837 PetscFree(buf); 1838 PetscFree(ibuf); 1839 PetscFree(rowners); 1840 PetscFree(dlens); 1841 PetscFree(mask); 1842 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1843 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1844 return 0; 1845 } 1846 1847 1848